summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/boot/dts/rk3036.dtsi1
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi1
-rw-r--r--arch/arm/boot/dts/rk3xxx.dtsi1
-rw-r--r--arch/arm/include/asm/xen/hypercall.h88
-rw-r--r--arch/arm/include/asm/xen/hypervisor.h40
-rw-r--r--arch/arm/include/asm/xen/interface.h86
-rw-r--r--arch/arm/include/asm/xen/page-coherent.h99
-rw-r--r--arch/arm/include/asm/xen/page.h123
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm/kvm/arm.c6
-rw-r--r--arch/arm/xen/enlighten.c3
-rw-r--r--arch/arm/xen/mm.c1
-rw-r--r--arch/arm64/Kconfig12
-rw-r--r--arch/arm64/Kconfig.debug35
-rw-r--r--arch/arm64/Makefile10
-rw-r--r--arch/arm64/boot/dts/hisilicon/hi6220.dtsi1
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173-evb.dts63
-rw-r--r--arch/arm64/boot/dts/mediatek/mt8173.dtsi29
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3368.dtsi1
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/arm64/include/asm/assembler.h48
-rw-r--r--arch/arm64/include/asm/cacheflush.h7
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h30
-rw-r--r--arch/arm64/include/asm/current.h22
-rw-r--r--arch/arm64/include/asm/debug-monitors.h3
-rw-r--r--arch/arm64/include/asm/efi.h26
-rw-r--r--arch/arm64/include/asm/elf.h12
-rw-r--r--arch/arm64/include/asm/futex.h17
-rw-r--r--arch/arm64/include/asm/hw_breakpoint.h6
-rw-r--r--arch/arm64/include/asm/kernel-pgtable.h7
-rw-r--r--arch/arm64/include/asm/mmu.h3
-rw-r--r--arch/arm64/include/asm/mmu_context.h53
-rw-r--r--arch/arm64/include/asm/neon.h3
-rw-r--r--arch/arm64/include/asm/opcodes.h5
-rw-r--r--arch/arm64/include/asm/percpu.h18
-rw-r--r--arch/arm64/include/asm/perf_event.h2
-rw-r--r--arch/arm64/include/asm/probes.h21
-rw-r--r--arch/arm64/include/asm/ptdump.h22
-rw-r--r--arch/arm64/include/asm/ptrace.h8
-rw-r--r--arch/arm64/include/asm/smp.h14
-rw-r--r--arch/arm64/include/asm/stack_pointer.h9
-rw-r--r--arch/arm64/include/asm/suspend.h2
-rw-r--r--arch/arm64/include/asm/sysreg.h45
-rw-r--r--arch/arm64/include/asm/thread_info.h40
-rw-r--r--arch/arm64/include/asm/uaccess.h175
-rw-r--r--arch/arm64/include/asm/uprobes.h36
-rw-r--r--arch/arm64/include/asm/xen/hypercall.h2
-rw-r--r--arch/arm64/include/asm/xen/hypervisor.h2
-rw-r--r--arch/arm64/include/asm/xen/interface.h2
-rw-r--r--arch/arm64/include/asm/xen/page-coherent.h2
-rw-r--r--arch/arm64/include/asm/xen/page.h2
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c16
-rw-r--r--arch/arm64/kernel/asm-offsets.c13
-rw-r--r--arch/arm64/kernel/cpufeature.c18
-rw-r--r--arch/arm64/kernel/debug-monitors.c40
-rw-r--r--arch/arm64/kernel/efi.c8
-rw-r--r--arch/arm64/kernel/entry.S110
-rw-r--r--arch/arm64/kernel/fpsimd.c14
-rw-r--r--arch/arm64/kernel/head.S30
-rw-r--r--arch/arm64/kernel/hibernate.c4
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c153
-rw-r--r--arch/arm64/kernel/insn.c1
-rw-r--r--arch/arm64/kernel/kgdb.c3
-rw-r--r--arch/arm64/kernel/probes/Makefile2
-rw-r--r--arch/arm64/kernel/probes/decode-insn.c33
-rw-r--r--arch/arm64/kernel/probes/decode-insn.h8
-rw-r--r--arch/arm64/kernel/probes/kprobes.c36
-rw-r--r--arch/arm64/kernel/probes/simulate-insn.c16
-rw-r--r--arch/arm64/kernel/probes/uprobes.c216
-rw-r--r--arch/arm64/kernel/process.c38
-rw-r--r--arch/arm64/kernel/ptrace.c7
-rw-r--r--arch/arm64/kernel/return_address.c1
-rw-r--r--arch/arm64/kernel/setup.c9
-rw-r--r--arch/arm64/kernel/signal.c3
-rw-r--r--arch/arm64/kernel/sleep.S3
-rw-r--r--arch/arm64/kernel/smp.c14
-rw-r--r--arch/arm64/kernel/stacktrace.c7
-rw-r--r--arch/arm64/kernel/suspend.c6
-rw-r--r--arch/arm64/kernel/topology.c223
-rw-r--r--arch/arm64/kernel/traps.c28
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S5
-rw-r--r--arch/arm64/kvm/Kconfig4
-rw-r--r--arch/arm64/kvm/handle_exit.c11
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S9
-rw-r--r--arch/arm64/kvm/hyp/switch.c13
-rw-r--r--arch/arm64/kvm/reset.c6
-rw-r--r--arch/arm64/lib/clear_user.S11
-rw-r--r--arch/arm64/lib/copy_from_user.S11
-rw-r--r--arch/arm64/lib/copy_in_user.S11
-rw-r--r--arch/arm64/lib/copy_to_user.S11
-rw-r--r--arch/arm64/mm/Makefile3
-rw-r--r--arch/arm64/mm/cache.S6
-rw-r--r--arch/arm64/mm/context.c7
-rw-r--r--arch/arm64/mm/dma-mapping.c5
-rw-r--r--arch/arm64/mm/dump.c106
-rw-r--r--arch/arm64/mm/fault.c22
-rw-r--r--arch/arm64/mm/flush.c9
-rw-r--r--arch/arm64/mm/hugetlbpage.c22
-rw-r--r--arch/arm64/mm/mmu.c137
-rw-r--r--arch/arm64/mm/proc.S12
-rw-r--r--arch/arm64/mm/ptdump_debugfs.c31
-rw-r--r--arch/arm64/xen/hypercall.S15
-rw-r--r--arch/blackfin/mach-bf561/coreb.c10
-rw-r--r--arch/mips/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h47
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h27
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h49
-rw-r--r--arch/powerpc/include/asm/mmu.h5
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/include/asm/reg.h14
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h5
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c63
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c254
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c72
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c223
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c23
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S140
-rw-r--r--arch/powerpc/kvm/powerpc.c16
-rw-r--r--arch/powerpc/kvm/trace_hv.h2
-rw-r--r--arch/powerpc/mm/hash_native_64.c30
-rw-r--r--arch/powerpc/mm/hash_utils_64.c28
-rw-r--r--arch/powerpc/mm/pgtable-radix.c18
-rw-r--r--arch/powerpc/mm/pgtable_64.c34
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/ps3/htab.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/boot/compressed/Makefile2
-rw-r--r--arch/s390/boot/compressed/head.S2
-rw-r--r--arch/s390/configs/default_defconfig6
-rw-r--r--arch/s390/configs/gcov_defconfig4
-rw-r--r--arch/s390/configs/performance_defconfig4
-rw-r--r--arch/s390/crypto/prng.c6
-rw-r--r--arch/s390/hypfs/inode.c24
-rw-r--r--arch/s390/include/asm/Kbuild4
-rw-r--r--arch/s390/include/asm/asm-offsets.h1
-rw-r--r--arch/s390/include/asm/atomic.h207
-rw-r--r--arch/s390/include/asm/atomic_ops.h130
-rw-r--r--arch/s390/include/asm/bitops.h62
-rw-r--r--arch/s390/include/asm/cpu_mf.h3
-rw-r--r--arch/s390/include/asm/elf.h6
-rw-r--r--arch/s390/include/asm/facilities_src.h82
-rw-r--r--arch/s390/include/asm/ipl.h2
-rw-r--r--arch/s390/include/asm/lowcore.h5
-rw-r--r--arch/s390/include/asm/pci_clp.h5
-rw-r--r--arch/s390/include/asm/pgalloc.h22
-rw-r--r--arch/s390/include/asm/preempt.h137
-rw-r--r--arch/s390/include/asm/processor.h6
-rw-r--r--arch/s390/include/asm/sclp.h10
-rw-r--r--arch/s390/include/asm/scsw.h6
-rw-r--r--arch/s390/include/asm/smp.h8
-rw-r--r--arch/s390/include/asm/string.h3
-rw-r--r--arch/s390/include/asm/sysinfo.h7
-rw-r--r--arch/s390/include/asm/thread_info.h24
-rw-r--r--arch/s390/include/asm/timex.h41
-rw-r--r--arch/s390/include/asm/topology.h28
-rw-r--r--arch/s390/include/asm/uaccess.h6
-rw-r--r--arch/s390/include/asm/vdso.h2
-rw-r--r--arch/s390/include/uapi/asm/Kbuild5
-rw-r--r--arch/s390/kernel/Makefile65
-rw-r--r--arch/s390/kernel/asm-offsets.c17
-rw-r--r--arch/s390/kernel/compat_signal.c4
-rw-r--r--arch/s390/kernel/early.c50
-rw-r--r--arch/s390/kernel/entry.S51
-rw-r--r--arch/s390/kernel/head.S2
-rw-r--r--arch/s390/kernel/head64.S7
-rw-r--r--arch/s390/kernel/ipl.c7
-rw-r--r--arch/s390/kernel/irq.c2
-rw-r--r--arch/s390/kernel/lgr.c5
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c53
-rw-r--r--arch/s390/kernel/process.c6
-rw-r--r--arch/s390/kernel/ptrace.c14
-rw-r--r--arch/s390/kernel/setup.c22
-rw-r--r--arch/s390/kernel/signal.c14
-rw-r--r--arch/s390/kernel/smp.c32
-rw-r--r--arch/s390/kernel/swsusp.S2
-rw-r--r--arch/s390/kernel/sysinfo.c33
-rw-r--r--arch/s390/kernel/time.c191
-rw-r--r--arch/s390/kernel/topology.c53
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S23
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S23
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S11
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S11
-rw-r--r--arch/s390/kernel/vtime.c26
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/kvm/kvm-s390.c57
-rw-r--r--arch/s390/lib/mem.S39
-rw-r--r--arch/s390/mm/fault.c1
-rw-r--r--arch/s390/mm/vmem.c9
-rw-r--r--arch/s390/numa/mode_emu.c38
-rw-r--r--arch/s390/numa/toptree.c16
-rw-r--r--arch/s390/pci/pci.c8
-rw-r--r--arch/s390/pci/pci_clp.c3
-rw-r--r--arch/s390/pci/pci_debug.c2
-rw-r--r--arch/s390/pci/pci_dma.c36
-rw-r--r--arch/s390/tools/Makefile2
-rw-r--r--arch/s390/tools/gen_facilities.c76
-rw-r--r--arch/sh/kernel/cpu/Makefile2
-rw-r--r--arch/sh/kernel/cpu/irq/Makefile2
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/include/asm/e820.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h18
-rw-r--r--arch/x86/include/asm/vmx.h37
-rw-r--r--arch/x86/include/uapi/asm/vmx.h5
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c2
-rw-r--r--arch/x86/kvm/cpuid.c26
-rw-r--r--arch/x86/kvm/emulate.c200
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/i8254.c15
-rw-r--r--arch/x86/kvm/i8254.h3
-rw-r--r--arch/x86/kvm/lapic.c212
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c32
-rw-r--r--arch/x86/kvm/svm.c21
-rw-r--r--arch/x86/kvm/vmx.c1092
-rw-r--r--arch/x86/kvm/x86.c92
-rw-r--r--arch/x86/pci/xen.c4
-rw-r--r--arch/x86/platform/ce4100/ce4100.c2
-rw-r--r--arch/x86/xen/pci-swiotlb-xen.c1
-rw-r--r--arch/x86/xen/setup.c6
231 files changed, 4974 insertions, 2556 deletions
diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi
index a935523a1eb8..7c2dc19925a1 100644
--- a/arch/arm/boot/dts/rk3036.dtsi
+++ b/arch/arm/boot/dts/rk3036.dtsi
@@ -204,7 +204,6 @@
g-np-tx-fifo-size = <16>;
g-rx-fifo-size = <275>;
g-tx-fifo-size = <256 128 128 64 64 32>;
- g-use-dma;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index 17ec2e2d7a60..74a749c566ee 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -596,7 +596,6 @@
g-np-tx-fifo-size = <16>;
g-rx-fifo-size = <275>;
g-tx-fifo-size = <256 128 128 64 64 32>;
- g-use-dma;
phys = <&usbphy0>;
phy-names = "usb2-phy";
status = "disabled";
diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi
index e15beb3c671e..8fbd3c806fa0 100644
--- a/arch/arm/boot/dts/rk3xxx.dtsi
+++ b/arch/arm/boot/dts/rk3xxx.dtsi
@@ -181,7 +181,6 @@
g-np-tx-fifo-size = <16>;
g-rx-fifo-size = <275>;
g-tx-fifo-size = <256 128 128 64 64 32>;
- g-use-dma;
phys = <&usbphy0>;
phy-names = "usb2-phy";
status = "disabled";
diff --git a/arch/arm/include/asm/xen/hypercall.h b/arch/arm/include/asm/xen/hypercall.h
index 9d874db13c0e..3522cbaed316 100644
--- a/arch/arm/include/asm/xen/hypercall.h
+++ b/arch/arm/include/asm/xen/hypercall.h
@@ -1,87 +1 @@
-/******************************************************************************
- * hypercall.h
- *
- * Linux-specific hypervisor handling.
- *
- * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef _ASM_ARM_XEN_HYPERCALL_H
-#define _ASM_ARM_XEN_HYPERCALL_H
-
-#include <linux/bug.h>
-
-#include <xen/interface/xen.h>
-#include <xen/interface/sched.h>
-#include <xen/interface/platform.h>
-
-long privcmd_call(unsigned call, unsigned long a1,
- unsigned long a2, unsigned long a3,
- unsigned long a4, unsigned long a5);
-int HYPERVISOR_xen_version(int cmd, void *arg);
-int HYPERVISOR_console_io(int cmd, int count, char *str);
-int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
-int HYPERVISOR_sched_op(int cmd, void *arg);
-int HYPERVISOR_event_channel_op(int cmd, void *arg);
-unsigned long HYPERVISOR_hvm_op(int op, void *arg);
-int HYPERVISOR_memory_op(unsigned int cmd, void *arg);
-int HYPERVISOR_physdev_op(int cmd, void *arg);
-int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args);
-int HYPERVISOR_tmem_op(void *arg);
-int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type);
-int HYPERVISOR_platform_op_raw(void *arg);
-static inline int HYPERVISOR_platform_op(struct xen_platform_op *op)
-{
- op->interface_version = XENPF_INTERFACE_VERSION;
- return HYPERVISOR_platform_op_raw(op);
-}
-int HYPERVISOR_multicall(struct multicall_entry *calls, uint32_t nr);
-
-static inline int
-HYPERVISOR_suspend(unsigned long start_info_mfn)
-{
- struct sched_shutdown r = { .reason = SHUTDOWN_suspend };
-
- /* start_info_mfn is unused on ARM */
- return HYPERVISOR_sched_op(SCHEDOP_shutdown, &r);
-}
-
-static inline void
-MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va,
- unsigned int new_val, unsigned long flags)
-{
- BUG();
-}
-
-static inline void
-MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req,
- int count, int *success_count, domid_t domid)
-{
- BUG();
-}
-
-#endif /* _ASM_ARM_XEN_HYPERCALL_H */
+#include <xen/arm/hypercall.h>
diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h
index 95251512e2c4..d6e7709d0688 100644
--- a/arch/arm/include/asm/xen/hypervisor.h
+++ b/arch/arm/include/asm/xen/hypervisor.h
@@ -1,39 +1 @@
-#ifndef _ASM_ARM_XEN_HYPERVISOR_H
-#define _ASM_ARM_XEN_HYPERVISOR_H
-
-#include <linux/init.h>
-
-extern struct shared_info *HYPERVISOR_shared_info;
-extern struct start_info *xen_start_info;
-
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
- PARAVIRT_LAZY_NONE,
- PARAVIRT_LAZY_MMU,
- PARAVIRT_LAZY_CPU,
-};
-
-static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
-{
- return PARAVIRT_LAZY_NONE;
-}
-
-extern struct dma_map_ops *xen_dma_ops;
-
-#ifdef CONFIG_XEN
-void __init xen_early_init(void);
-#else
-static inline void xen_early_init(void) { return; }
-#endif
-
-#ifdef CONFIG_HOTPLUG_CPU
-static inline void xen_arch_register_cpu(int num)
-{
-}
-
-static inline void xen_arch_unregister_cpu(int num)
-{
-}
-#endif
-
-#endif /* _ASM_ARM_XEN_HYPERVISOR_H */
+#include <xen/arm/hypervisor.h>
diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h
index 75d596862892..88c0d75da190 100644
--- a/arch/arm/include/asm/xen/interface.h
+++ b/arch/arm/include/asm/xen/interface.h
@@ -1,85 +1 @@
-/******************************************************************************
- * Guest OS interface to ARM Xen.
- *
- * Stefano Stabellini <stefano.stabellini@eu.citrix.com>, Citrix, 2012
- */
-
-#ifndef _ASM_ARM_XEN_INTERFACE_H
-#define _ASM_ARM_XEN_INTERFACE_H
-
-#include <linux/types.h>
-
-#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
-
-#define __DEFINE_GUEST_HANDLE(name, type) \
- typedef struct { union { type *p; uint64_aligned_t q; }; } \
- __guest_handle_ ## name
-
-#define DEFINE_GUEST_HANDLE_STRUCT(name) \
- __DEFINE_GUEST_HANDLE(name, struct name)
-#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
-#define GUEST_HANDLE(name) __guest_handle_ ## name
-
-#define set_xen_guest_handle(hnd, val) \
- do { \
- if (sizeof(hnd) == 8) \
- *(uint64_t *)&(hnd) = 0; \
- (hnd).p = val; \
- } while (0)
-
-#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op
-
-#ifndef __ASSEMBLY__
-/* Explicitly size integers that represent pfns in the interface with
- * Xen so that we can have one ABI that works for 32 and 64 bit guests.
- * Note that this means that the xen_pfn_t type may be capable of
- * representing pfn's which the guest cannot represent in its own pfn
- * type. However since pfn space is controlled by the guest this is
- * fine since it simply wouldn't be able to create any sure pfns in
- * the first place.
- */
-typedef uint64_t xen_pfn_t;
-#define PRI_xen_pfn "llx"
-typedef uint64_t xen_ulong_t;
-#define PRI_xen_ulong "llx"
-typedef int64_t xen_long_t;
-#define PRI_xen_long "llx"
-/* Guest handles for primitive C types. */
-__DEFINE_GUEST_HANDLE(uchar, unsigned char);
-__DEFINE_GUEST_HANDLE(uint, unsigned int);
-DEFINE_GUEST_HANDLE(char);
-DEFINE_GUEST_HANDLE(int);
-DEFINE_GUEST_HANDLE(void);
-DEFINE_GUEST_HANDLE(uint64_t);
-DEFINE_GUEST_HANDLE(uint32_t);
-DEFINE_GUEST_HANDLE(xen_pfn_t);
-DEFINE_GUEST_HANDLE(xen_ulong_t);
-
-/* Maximum number of virtual CPUs in multi-processor guests. */
-#define MAX_VIRT_CPUS 1
-
-struct arch_vcpu_info { };
-struct arch_shared_info { };
-
-/* TODO: Move pvclock definitions some place arch independent */
-struct pvclock_vcpu_time_info {
- u32 version;
- u32 pad0;
- u64 tsc_timestamp;
- u64 system_time;
- u32 tsc_to_system_mul;
- s8 tsc_shift;
- u8 flags;
- u8 pad[2];
-} __attribute__((__packed__)); /* 32 bytes */
-
-/* It is OK to have a 12 bytes struct with no padding because it is packed */
-struct pvclock_wall_clock {
- u32 version;
- u32 sec;
- u32 nsec;
- u32 sec_hi;
-} __attribute__((__packed__));
-#endif
-
-#endif /* _ASM_ARM_XEN_INTERFACE_H */
+#include <xen/arm/interface.h>
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 95ce6ac3a971..b3ef061d8b74 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -1,98 +1 @@
-#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
-#define _ASM_ARM_XEN_PAGE_COHERENT_H
-
-#include <asm/page.h>
-#include <linux/dma-mapping.h>
-
-void __xen_dma_map_page(struct device *hwdev, struct page *page,
- dma_addr_t dev_addr, unsigned long offset, size_t size,
- enum dma_data_direction dir, unsigned long attrs);
-void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs);
-void __xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir);
-
-void __xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir);
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
-{
- return __generic_dma_ops(hwdev)->alloc(hwdev, size, dma_handle, flags, attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
-{
- __generic_dma_ops(hwdev)->free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
- dma_addr_t dev_addr, unsigned long offset, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- unsigned long page_pfn = page_to_xen_pfn(page);
- unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
- unsigned long compound_pages =
- (1<<compound_order(page)) * XEN_PFN_PER_PAGE;
- bool local = (page_pfn <= dev_pfn) &&
- (dev_pfn - page_pfn < compound_pages);
-
- /*
- * Dom0 is mapped 1:1, while the Linux page can span across
- * multiple Xen pages, it's not possible for it to contain a
- * mix of local and foreign Xen pages. So if the first xen_pfn
- * == mfn the page is local otherwise it's a foreign page
- * grant-mapped in dom0. If the page is local we can safely
- * call the native dma_ops function, otherwise we call the xen
- * specific function.
- */
- if (local)
- __generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
- else
- __xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
-}
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
- unsigned long pfn = PFN_DOWN(handle);
- /*
- * Dom0 is mapped 1:1, while the Linux page can be spanned accross
- * multiple Xen page, it's not possible to have a mix of local and
- * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
- * foreign mfn will always return false. If the page is local we can
- * safely call the native dma_ops function, otherwise we call the xen
- * specific function.
- */
- if (pfn_valid(pfn)) {
- if (__generic_dma_ops(hwdev)->unmap_page)
- __generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
- } else
- __xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
-}
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- unsigned long pfn = PFN_DOWN(handle);
- if (pfn_valid(pfn)) {
- if (__generic_dma_ops(hwdev)->sync_single_for_cpu)
- __generic_dma_ops(hwdev)->sync_single_for_cpu(hwdev, handle, size, dir);
- } else
- __xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
- dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
- unsigned long pfn = PFN_DOWN(handle);
- if (pfn_valid(pfn)) {
- if (__generic_dma_ops(hwdev)->sync_single_for_device)
- __generic_dma_ops(hwdev)->sync_single_for_device(hwdev, handle, size, dir);
- } else
- __xen_dma_sync_single_for_device(hwdev, handle, size, dir);
-}
-
-#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
+#include <xen/arm/page-coherent.h>
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 415dbc6e43fd..31bbc803cecb 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -1,122 +1 @@
-#ifndef _ASM_ARM_XEN_PAGE_H
-#define _ASM_ARM_XEN_PAGE_H
-
-#include <asm/page.h>
-#include <asm/pgtable.h>
-
-#include <linux/pfn.h>
-#include <linux/types.h>
-#include <linux/dma-mapping.h>
-
-#include <xen/xen.h>
-#include <xen/interface/grant_table.h>
-
-#define phys_to_machine_mapping_valid(pfn) (1)
-
-/* Xen machine address */
-typedef struct xmaddr {
- phys_addr_t maddr;
-} xmaddr_t;
-
-/* Xen pseudo-physical address */
-typedef struct xpaddr {
- phys_addr_t paddr;
-} xpaddr_t;
-
-#define XMADDR(x) ((xmaddr_t) { .maddr = (x) })
-#define XPADDR(x) ((xpaddr_t) { .paddr = (x) })
-
-#define INVALID_P2M_ENTRY (~0UL)
-
-/*
- * The pseudo-physical frame (pfn) used in all the helpers is always based
- * on Xen page granularity (i.e 4KB).
- *
- * A Linux page may be split across multiple non-contiguous Xen page so we
- * have to keep track with frame based on 4KB page granularity.
- *
- * PV drivers should never make a direct usage of those helpers (particularly
- * pfn_to_gfn and gfn_to_pfn).
- */
-
-unsigned long __pfn_to_mfn(unsigned long pfn);
-extern struct rb_root phys_to_mach;
-
-/* Pseudo-physical <-> Guest conversion */
-static inline unsigned long pfn_to_gfn(unsigned long pfn)
-{
- return pfn;
-}
-
-static inline unsigned long gfn_to_pfn(unsigned long gfn)
-{
- return gfn;
-}
-
-/* Pseudo-physical <-> BUS conversion */
-static inline unsigned long pfn_to_bfn(unsigned long pfn)
-{
- unsigned long mfn;
-
- if (phys_to_mach.rb_node != NULL) {
- mfn = __pfn_to_mfn(pfn);
- if (mfn != INVALID_P2M_ENTRY)
- return mfn;
- }
-
- return pfn;
-}
-
-static inline unsigned long bfn_to_pfn(unsigned long bfn)
-{
- return bfn;
-}
-
-#define bfn_to_local_pfn(bfn) bfn_to_pfn(bfn)
-
-/* VIRT <-> GUEST conversion */
-#define virt_to_gfn(v) (pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT))
-#define gfn_to_virt(m) (__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT))
-
-/* Only used in PV code. But ARM guests are always HVM. */
-static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
-{
- BUG();
-}
-
-/* TODO: this shouldn't be here but it is because the frontend drivers
- * are using it (its rolled in headers) even though we won't hit the code path.
- * So for right now just punt with this.
- */
-static inline pte_t *lookup_address(unsigned long address, unsigned int *level)
-{
- BUG();
- return NULL;
-}
-
-extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
- struct gnttab_map_grant_ref *kmap_ops,
- struct page **pages, unsigned int count);
-
-extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
- struct gnttab_unmap_grant_ref *kunmap_ops,
- struct page **pages, unsigned int count);
-
-bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
-bool __set_phys_to_machine_multi(unsigned long pfn, unsigned long mfn,
- unsigned long nr_pages);
-
-static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
-{
- return __set_phys_to_machine(pfn, mfn);
-}
-
-#define xen_remap(cookie, size) ioremap_cache((cookie), (size))
-#define xen_unmap(cookie) iounmap((cookie))
-
-bool xen_arch_need_swiotlb(struct device *dev,
- phys_addr_t phys,
- dma_addr_t dev_addr);
-unsigned long xen_get_swiotlb_free_pages(unsigned int order);
-
-#endif /* _ASM_ARM_XEN_PAGE_H */
+#include <xen/arm/page.h>
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index b38c10c73579..af05f8e0903e 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -87,9 +87,11 @@ struct kvm_regs {
/* Supported VGICv3 address types */
#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
+#define KVM_VGIC_ITS_ADDR_TYPE 4
#define KVM_VGIC_V3_DIST_SIZE SZ_64K
#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 3e1cd0452d67..90d0176fb30d 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -34,6 +34,7 @@ config KVM
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_MSI
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f19842ea5418..d571243ab4d1 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -32,5 +32,6 @@ obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
+obj-y += $(KVM)/arm/vgic/vgic-its.o
obj-y += $(KVM)/irqchip.o
obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 19b5f5c1c0ff..8f92efa8460e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -221,6 +221,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_MSI_DEVID:
+ if (!kvm)
+ r = -EINVAL;
+ else
+ r = kvm->arch.vgic.msis_require_devid;
+ break;
default:
r = kvm_arch_dev_ioctl_check_extension(kvm, ext);
break;
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index f193414d0f6f..4986dc0c1dff 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -372,8 +372,7 @@ static int __init xen_guest_init(void)
* for secondary CPUs as they are brought up.
* For uniformity we use VCPUOP_register_vcpu_info even on cpu0.
*/
- xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info),
- sizeof(struct vcpu_info));
+ xen_vcpu_info = alloc_percpu(struct vcpu_info);
if (xen_vcpu_info == NULL)
return -ENOMEM;
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index d062f08f5020..bd62d94f8ac5 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -186,7 +186,6 @@ struct dma_map_ops *xen_dma_ops;
EXPORT_SYMBOL(xen_dma_ops);
static struct dma_map_ops xen_swiotlb_dma_ops = {
- .mapping_error = xen_swiotlb_dma_mapping_error,
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 657be7f5014e..111742126897 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -110,6 +110,7 @@ config ARM64
select POWER_SUPPLY
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
help
ARM 64-bit (AArch64) Linux support.
@@ -239,6 +240,9 @@ config PGTABLE_LEVELS
default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47
default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -791,6 +795,14 @@ config SETEND_EMULATION
If unsure, say Y
endif
+config ARM64_SW_TTBR0_PAN
+ bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+ help
+ Enabling this option prevents the kernel from accessing
+ user-space memory directly by pointing TTBR0_EL1 to a reserved
+ zeroed area and reserved ASID. The user access routines
+ restore the valid TTBR0_EL1 temporarily.
+
menu "ARMv8.1 architectural features"
config ARM64_HW_AFDBM
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index b661fe742615..d1ebd46872fd 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -2,9 +2,13 @@ menu "Kernel hacking"
source "lib/Kconfig.debug"
-config ARM64_PTDUMP
+config ARM64_PTDUMP_CORE
+ def_bool n
+
+config ARM64_PTDUMP_DEBUGFS
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
+ select ARM64_PTDUMP_CORE
select DEBUG_FS
help
Say Y here if you want to show the kernel pagetable layout in a
@@ -38,6 +42,35 @@ config ARM64_RANDOMIZE_TEXT_OFFSET
of TEXT_OFFSET and platforms must not require a specific
value.
+config DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ select ARM64_PTDUMP_CORE
+ ---help---
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+ This check also includes UXN, which should be set on all kernel
+ mappings.
+
+ Look for a message in dmesg output like this:
+
+ arm64/mm: Checked W+X mappings: passed, no W+X pages found.
+
+ or like this, if the check failed:
+
+ arm64/mm: Checked W+X mappings: FAILED, <N> W+X pages found.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
config DEBUG_SET_MODULE_RONX
bool "Set loadable kernel module data as NX and text as RO"
depends on MODULES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 3635b8662724..b9a4a934ca05 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -37,10 +37,16 @@ $(warning LSE atomics not supported by binutils)
endif
endif
-KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr)
+brokengasinst := $(call as-instr,1:\n.inst 0\n.rept . - 1b\n\nnop\n.endr\n,,-DCONFIG_BROKEN_GAS_INST=1)
+
+ifneq ($(brokengasinst),)
+$(warning Detected assembler with broken .inst; disassembly will be unreliable)
+endif
+
+KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads)
-KBUILD_AFLAGS += $(lseinstr)
+KBUILD_AFLAGS += $(lseinstr) $(brokengasinst)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
index 17839db585d5..e0ea60382087 100644
--- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
+++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi
@@ -747,7 +747,6 @@
clocks = <&sys_ctrl HI6220_USBOTG_HCLK>;
clock-names = "otg";
dr_mode = "otg";
- g-use-dma;
g-rx-fifo-size = <512>;
g-np-tx-fifo-size = <128>;
g-tx-fifo-size = <128 128 128 128 128 128>;
diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
index 2a7f731c7759..0ecaad4333a7 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts
@@ -34,15 +34,6 @@
chosen { };
- usb_p1_vbus: regulator@0 {
- compatible = "regulator-fixed";
- regulator-name = "usb_vbus";
- regulator-min-microvolt = <5000000>;
- regulator-max-microvolt = <5000000>;
- gpio = <&pio 130 GPIO_ACTIVE_HIGH>;
- enable-active-high;
- };
-
connector {
compatible = "hdmi-connector";
label = "hdmi";
@@ -54,6 +45,29 @@
};
};
};
+
+ extcon_usb: extcon_iddig {
+ compatible = "linux,extcon-usb-gpio";
+ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>;
+ };
+
+ usb_p1_vbus: regulator@0 {
+ compatible = "regulator-fixed";
+ regulator-name = "usb_vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&pio 130 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
+
+ usb_p0_vbus: regulator@1 {
+ compatible = "regulator-fixed";
+ regulator-name = "vbus";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&pio 9 GPIO_ACTIVE_HIGH>;
+ enable-active-high;
+ };
};
&cec {
@@ -243,6 +257,20 @@
bias-pull-down = <MTK_PUPD_SET_R1R0_10>;
};
};
+
+ usb_id_pins_float: usb_iddig_pull_up {
+ pins_iddig {
+ pinmux = <MT8173_PIN_16_IDDIG__FUNC_IDDIG>;
+ bias-pull-up;
+ };
+ };
+
+ usb_id_pins_ground: usb_iddig_pull_down {
+ pins_iddig {
+ pinmux = <MT8173_PIN_16_IDDIG__FUNC_IDDIG>;
+ bias-pull-down;
+ };
+ };
};
&pwm0 {
@@ -469,12 +497,25 @@
status = "okay";
};
+&ssusb {
+ vusb33-supply = <&mt6397_vusb_reg>;
+ vbus-supply = <&usb_p0_vbus>;
+ extcon = <&extcon_usb>;
+ dr_mode = "otg";
+ mediatek,enable-wakeup;
+ pinctrl-names = "default", "id_float", "id_ground";
+ pinctrl-0 = <&usb_id_pins_float>;
+ pinctrl-1 = <&usb_id_pins_float>;
+ pinctrl-2 = <&usb_id_pins_ground>;
+ status = "okay";
+};
+
&uart0 {
status = "okay";
};
-&usb30 {
+&usb_host {
vusb33-supply = <&mt6397_vusb_reg>;
vbus-supply = <&usb_p1_vbus>;
- mediatek,wakeup-src = <1>;
+ status = "okay";
};
diff --git a/arch/arm64/boot/dts/mediatek/mt8173.dtsi b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
index 1c71e256601d..c2d588ca59b7 100644
--- a/arch/arm64/boot/dts/mediatek/mt8173.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8173.dtsi
@@ -707,11 +707,14 @@
status = "disabled";
};
- usb30: usb@11270000 {
- compatible = "mediatek,mt8173-xhci";
- reg = <0 0x11270000 0 0x1000>,
+ ssusb: usb@11271000 {
+ compatible = "mediatek,mt8173-mtu3";
+ reg = <0 0x11271000 0 0x3000>,
<0 0x11280700 0 0x0100>;
- interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_LOW>;
+ reg-names = "mac", "ippc";
+ interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_LOW>;
+ phys = <&phy_port0 PHY_TYPE_USB3>,
+ <&phy_port1 PHY_TYPE_USB2>;
power-domains = <&scpsys MT8173_POWER_DOMAIN_USB>;
clocks = <&topckgen CLK_TOP_USB30_SEL>,
<&pericfg CLK_PERI_USB0>,
@@ -719,10 +722,22 @@
clock-names = "sys_ck",
"wakeup_deb_p0",
"wakeup_deb_p1";
- phys = <&phy_port0 PHY_TYPE_USB3>,
- <&phy_port1 PHY_TYPE_USB2>;
mediatek,syscon-wakeup = <&pericfg>;
- status = "okay";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+ status = "disabled";
+
+ usb_host: xhci@11270000 {
+ compatible = "mediatek,mt8173-xhci";
+ reg = <0 0x11270000 0 0x1000>;
+ reg-names = "mac";
+ interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT8173_POWER_DOMAIN_USB>;
+ clocks = <&topckgen CLK_TOP_USB30_SEL>;
+ clock-names = "sys_ck";
+ status = "disabled";
+ };
};
u3phy: usb-phy@11290000 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
index 0fcb2147c9f9..df231c4df5a5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
@@ -537,7 +537,6 @@
g-np-tx-fifo-size = <16>;
g-rx-fifo-size = <275>;
g-tx-fifo-size = <256 128 128 64 64 32>;
- g-use-dma;
status = "disabled";
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 6be08113a96d..c3caaddde6cc 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -82,6 +82,7 @@ CONFIG_KEXEC=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y
CONFIG_CPU_IDLE=y
+CONFIG_HIBERNATION=y
CONFIG_ARM_CPUIDLE=y
CONFIG_CPU_FREQ=y
CONFIG_CPUFREQ_DT=y
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index b4ab238a59ec..8365a84c2640 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,7 +1,6 @@
generic-y += bugs.h
generic-y += clkdev.h
generic-y += cputime.h
-generic-y += current.h
generic-y += delay.h
generic-y += div64.h
generic-y += dma.h
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 28bfe6132eb6..446f6c46d4b1 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -41,6 +41,15 @@
msr daifclr, #2
.endm
+ .macro save_and_disable_irq, flags
+ mrs \flags, daif
+ msr daifset, #2
+ .endm
+
+ .macro restore_irq, flags
+ msr daif, \flags
+ .endm
+
/*
* Enable and disable debug exceptions.
*/
@@ -202,14 +211,25 @@ lr .req x30 // link register
.endm
/*
+ * @dst: Result of per_cpu(sym, smp_processor_id())
* @sym: The name of the per-cpu variable
- * @reg: Result of per_cpu(sym, smp_processor_id())
* @tmp: scratch register
*/
- .macro this_cpu_ptr, sym, reg, tmp
- adr_l \reg, \sym
+ .macro adr_this_cpu, dst, sym, tmp
+ adr_l \dst, \sym
mrs \tmp, tpidr_el1
- add \reg, \reg, \tmp
+ add \dst, \dst, \tmp
+ .endm
+
+ /*
+ * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
+ * @sym: The name of the per-cpu variable
+ * @tmp: scratch register
+ */
+ .macro ldr_this_cpu dst, sym, tmp
+ adr_l \dst, \sym
+ mrs \tmp, tpidr_el1
+ ldr \dst, [\dst, \tmp]
.endm
/*
@@ -395,4 +415,24 @@ alternative_endif
movk \reg, :abs_g0_nc:\val
.endm
+/*
+ * Return the current thread_info.
+ */
+ .macro get_thread_info, rd
+ mrs \rd, sp_el0
+ .endm
+
+/*
+ * Errata workaround post TTBR0_EL1 update.
+ */
+ .macro post_ttbr0_update_workaround
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+alternative_if ARM64_WORKAROUND_CAVIUM_27456
+ ic iallu
+ dsb nsh
+ isb
+alternative_else_nop_endif
+#endif
+ .endm
+
#endif /* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 2e5fb976a572..5a2a6ee65f65 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -65,12 +65,12 @@
* - kaddr - page address
* - size - region size
*/
-extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
extern void flush_icache_range(unsigned long start, unsigned long end);
extern void __flush_dcache_area(void *addr, size_t len);
extern void __clean_dcache_area_poc(void *addr, size_t len);
extern void __clean_dcache_area_pou(void *addr, size_t len);
extern long __flush_cache_user_range(unsigned long start, unsigned long end);
+extern void sync_icache_aliases(void *kaddr, unsigned long len);
static inline void flush_cache_mm(struct mm_struct *mm)
{
@@ -81,6 +81,11 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
{
}
+static inline void flush_cache_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+}
+
/*
* Cache maintenance functions used by the DMA API. No to be used directly.
*/
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 87b446535185..4174f09678c4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -34,7 +34,8 @@
#define ARM64_HAS_32BIT_EL0 13
#define ARM64_HYP_OFFSET_LOW 14
#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15
+#define ARM64_HAS_NO_FPSIMD 16
-#define ARM64_NCAPS 16
+#define ARM64_NCAPS 17
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 0bc0b1de90c4..b4989df48670 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -9,8 +9,6 @@
#ifndef __ASM_CPUFEATURE_H
#define __ASM_CPUFEATURE_H
-#include <linux/jump_label.h>
-
#include <asm/cpucaps.h>
#include <asm/hwcap.h>
#include <asm/sysreg.h>
@@ -27,6 +25,8 @@
#ifndef __ASSEMBLY__
+#include <linux/bug.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
/* CPU feature register tracking */
@@ -104,14 +104,19 @@ static inline bool cpu_have_feature(unsigned int num)
return elf_hwcap & (1UL << num);
}
+/* System capability check for constant caps */
+static inline bool cpus_have_const_cap(int num)
+{
+ if (num >= ARM64_NCAPS)
+ return false;
+ return static_branch_unlikely(&cpu_hwcap_keys[num]);
+}
+
static inline bool cpus_have_cap(unsigned int num)
{
if (num >= ARM64_NCAPS)
return false;
- if (__builtin_constant_p(num))
- return static_branch_unlikely(&cpu_hwcap_keys[num]);
- else
- return test_bit(num, cpu_hwcaps);
+ return test_bit(num, cpu_hwcaps);
}
static inline void cpus_set_cap(unsigned int num)
@@ -200,7 +205,7 @@ static inline bool cpu_supports_mixed_endian_el0(void)
static inline bool system_supports_32bit_el0(void)
{
- return cpus_have_cap(ARM64_HAS_32BIT_EL0);
+ return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
}
static inline bool system_supports_mixed_endian_el0(void)
@@ -208,6 +213,17 @@ static inline bool system_supports_mixed_endian_el0(void)
return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1));
}
+static inline bool system_supports_fpsimd(void)
+{
+ return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
+}
+
+static inline bool system_uses_ttbr0_pan(void)
+{
+ return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) &&
+ !cpus_have_cap(ARM64_HAS_PAN);
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/current.h b/arch/arm64/include/asm/current.h
new file mode 100644
index 000000000000..f2bcbe2d9889
--- /dev/null
+++ b/arch/arm64/include/asm/current.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_CURRENT_H
+#define __ASM_CURRENT_H
+
+#include <linux/compiler.h>
+
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+static __always_inline struct task_struct *get_current(void)
+{
+ return (struct task_struct *)read_sysreg(sp_el0);
+}
+
+#define current get_current()
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_CURRENT_H */
+
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index b71420a12f26..a44cf5225429 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -68,6 +68,9 @@
#define BRK64_ESR_MASK 0xFFFF
#define BRK64_ESR_KPROBES 0x0004
#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+/* uprobes BRK opcodes with ESR encoding */
+#define BRK64_ESR_UPROBES 0x0005
+#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (BRK64_ESR_UPROBES << 5))
/* AArch32 */
#define DBG_ESR_EVT_BKPT 0x4
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index 771b3f0bc757..0b6b1633017f 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -1,6 +1,7 @@
#ifndef _ASM_EFI_H
#define _ASM_EFI_H
+#include <asm/cpufeature.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/neon.h>
@@ -78,7 +79,30 @@ static inline void efifb_setup_from_dmi(struct screen_info *si, const char *opt)
static inline void efi_set_pgd(struct mm_struct *mm)
{
- switch_mm(NULL, mm, NULL);
+ __switch_mm(mm);
+
+ if (system_uses_ttbr0_pan()) {
+ if (mm != current->active_mm) {
+ /*
+ * Update the current thread's saved ttbr0 since it is
+ * restored as part of a return from exception. Set
+ * the hardware TTBR0_EL1 using cpu_switch_mm()
+ * directly to enable potential errata workarounds.
+ */
+ update_saved_ttbr0(current, mm);
+ cpu_switch_mm(mm->pgd, mm);
+ } else {
+ /*
+ * Defer the switch to the current thread's TTBR0_EL1
+ * until uaccess_enable(). Restore the current
+ * thread's saved ttbr0 corresponding to its active_mm
+ * (if different from init_mm).
+ */
+ cpu_set_reserved_ttbr0();
+ if (current->active_mm != &init_mm)
+ update_saved_ttbr0(current, current->active_mm);
+ }
+ }
}
void efi_virtmap_load(void);
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index a55384f4a5d7..5d1700425efe 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -138,7 +138,11 @@ typedef struct user_fpsimd_state elf_fpregset_t;
*/
#define ELF_PLAT_INIT(_r, load_addr) (_r)->regs[0] = 0
-#define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT);
+#define SET_PERSONALITY(ex) \
+({ \
+ clear_bit(TIF_32BIT, &current->mm->context.flags); \
+ clear_thread_flag(TIF_32BIT); \
+})
/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
#define ARCH_DLINFO \
@@ -183,7 +187,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
((x)->e_flags & EF_ARM_EABI_MASK))
#define compat_start_thread compat_start_thread
-#define COMPAT_SET_PERSONALITY(ex) set_thread_flag(TIF_32BIT);
+#define COMPAT_SET_PERSONALITY(ex) \
+({ \
+ set_bit(TIF_32BIT, &current->mm->context.flags); \
+ set_thread_flag(TIF_32BIT); \
+ })
#define COMPAT_ARCH_DLINFO
extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
int uses_interp);
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index f2585cdd32c2..85c4a8981d47 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -21,15 +21,12 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
-#include <asm/alternative.h>
-#include <asm/cpufeature.h>
#include <asm/errno.h>
-#include <asm/sysreg.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
+do { \
+ uaccess_enable(); \
asm volatile( \
- ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
insn "\n" \
@@ -44,11 +41,11 @@
" .popsection\n" \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
- ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
: "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
: "r" (oparg), "Ir" (-EFAULT) \
- : "memory")
+ : "memory"); \
+ uaccess_disable(); \
+} while (0)
static inline int
futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
@@ -118,8 +115,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
+ uaccess_enable();
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
-ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
" sub %w3, %w1, %w4\n"
@@ -134,10 +131,10 @@ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
" .popsection\n"
_ASM_EXTABLE(1b, 4b)
_ASM_EXTABLE(2b, 4b)
-ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN)
: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
: "r" (oldval), "r" (newval), "Ir" (-EFAULT)
: "memory");
+ uaccess_disable();
*uval = val;
return ret;
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 9510ace570e2..b6b167ac082b 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -77,7 +77,11 @@ static inline void decode_ctrl_reg(u32 reg,
/* Lengths */
#define ARM_BREAKPOINT_LEN_1 0x1
#define ARM_BREAKPOINT_LEN_2 0x3
+#define ARM_BREAKPOINT_LEN_3 0x7
#define ARM_BREAKPOINT_LEN_4 0xf
+#define ARM_BREAKPOINT_LEN_5 0x1f
+#define ARM_BREAKPOINT_LEN_6 0x3f
+#define ARM_BREAKPOINT_LEN_7 0x7f
#define ARM_BREAKPOINT_LEN_8 0xff
/* Kernel stepping */
@@ -119,7 +123,7 @@ struct perf_event;
struct pmu;
extern int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type);
+ int *gen_len, int *gen_type, int *offset);
extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7e51d1b57c0c..7803343e5881 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -19,6 +19,7 @@
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
+#include <asm/pgtable.h>
#include <asm/sparsemem.h>
/*
@@ -54,6 +55,12 @@
#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+#define RESERVED_TTBR0_SIZE (PAGE_SIZE)
+#else
+#define RESERVED_TTBR0_SIZE (0)
+#endif
+
/* Initial memory map size */
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 8d9fce037b2f..47619411f0ff 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -19,6 +19,7 @@
typedef struct {
atomic64_t id;
void *vdso;
+ unsigned long flags;
} mm_context_t;
/*
@@ -34,7 +35,7 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
extern void init_mem_pgprot(void);
extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
- pgprot_t prot, bool allow_block_mappings);
+ pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a50185375f09..0363fe80455c 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -23,6 +23,7 @@
#include <linux/sched.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/proc-fns.h>
#include <asm-generic/mm_hooks.h>
#include <asm/cputype.h>
@@ -103,7 +104,7 @@ static inline void cpu_uninstall_idmap(void)
local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
- if (mm != &init_mm)
+ if (mm != &init_mm && !system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
}
@@ -163,20 +164,26 @@ enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
{
}
-/*
- * This is the actual mm switch as far as the scheduler
- * is concerned. No registers are touched. We avoid
- * calling the CPU specific function when the mm hasn't
- * actually changed.
- */
-static inline void
-switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+ struct mm_struct *mm)
{
- unsigned int cpu = smp_processor_id();
+ if (system_uses_ttbr0_pan()) {
+ BUG_ON(mm->pgd == swapper_pg_dir);
+ task_thread_info(tsk)->ttbr0 =
+ virt_to_phys(mm->pgd) | ASID(mm) << 48;
+ }
+}
+#else
+static inline void update_saved_ttbr0(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+}
+#endif
- if (prev == next)
- return;
+static inline void __switch_mm(struct mm_struct *next)
+{
+ unsigned int cpu = smp_processor_id();
/*
* init_mm.pgd does not contain any user mappings and it is always
@@ -190,8 +197,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
check_and_switch_context(next, cpu);
}
+static inline void
+switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (prev != next)
+ __switch_mm(next);
+
+ /*
+ * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
+ * value may have not been initialised yet (activate_mm caller) or the
+ * ASID has changed since the last run (following the context switch
+ * of another thread of the same process). Avoid setting the reserved
+ * TTBR0_EL1 to swapper_pg_dir (init_mm; e.g. via idle_task_exit).
+ */
+ if (next != &init_mm)
+ update_saved_ttbr0(tsk, next);
+}
+
#define deactivate_mm(tsk,mm) do { } while (0)
-#define activate_mm(prev,next) switch_mm(prev, next, NULL)
+#define activate_mm(prev,next) switch_mm(prev, next, current)
void verify_cpu_asid_bits(void);
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 13ce4cc18e26..ad4cdc966c0f 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -9,8 +9,9 @@
*/
#include <linux/types.h>
+#include <asm/fpsimd.h>
-#define cpu_has_neon() (1)
+#define cpu_has_neon() system_supports_fpsimd()
#define kernel_neon_begin() kernel_neon_begin_partial(32)
diff --git a/arch/arm64/include/asm/opcodes.h b/arch/arm64/include/asm/opcodes.h
deleted file mode 100644
index 123f45d92cd1..000000000000
--- a/arch/arm64/include/asm/opcodes.h
+++ /dev/null
@@ -1,5 +0,0 @@
-#ifdef CONFIG_CPU_BIG_ENDIAN
-#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN
-#endif
-
-#include <../../arm/include/asm/opcodes.h>
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 5394c8405e66..3bd498e4de4c 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,6 +16,8 @@
#ifndef __ASM_PERCPU_H
#define __ASM_PERCPU_H
+#include <asm/stack_pointer.h>
+
static inline void set_my_cpu_offset(unsigned long off)
{
asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
@@ -101,16 +103,16 @@ static inline unsigned long __percpu_read(void *ptr, int size)
switch (size) {
case 1:
- ret = ACCESS_ONCE(*(u8 *)ptr);
+ ret = READ_ONCE(*(u8 *)ptr);
break;
case 2:
- ret = ACCESS_ONCE(*(u16 *)ptr);
+ ret = READ_ONCE(*(u16 *)ptr);
break;
case 4:
- ret = ACCESS_ONCE(*(u32 *)ptr);
+ ret = READ_ONCE(*(u32 *)ptr);
break;
case 8:
- ret = ACCESS_ONCE(*(u64 *)ptr);
+ ret = READ_ONCE(*(u64 *)ptr);
break;
default:
BUILD_BUG();
@@ -123,16 +125,16 @@ static inline void __percpu_write(void *ptr, unsigned long val, int size)
{
switch (size) {
case 1:
- ACCESS_ONCE(*(u8 *)ptr) = (u8)val;
+ WRITE_ONCE(*(u8 *)ptr, (u8)val);
break;
case 2:
- ACCESS_ONCE(*(u16 *)ptr) = (u16)val;
+ WRITE_ONCE(*(u16 *)ptr, (u16)val);
break;
case 4:
- ACCESS_ONCE(*(u32 *)ptr) = (u32)val;
+ WRITE_ONCE(*(u32 *)ptr, (u32)val);
break;
case 8:
- ACCESS_ONCE(*(u64 *)ptr) = (u64)val;
+ WRITE_ONCE(*(u64 *)ptr, (u64)val);
break;
default:
BUILD_BUG();
diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index 38b6a2b49d68..8d5cbec17d80 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -17,6 +17,8 @@
#ifndef __ASM_PERF_EVENT_H
#define __ASM_PERF_EVENT_H
+#include <asm/stack_pointer.h>
+
#define ARMV8_PMU_MAX_COUNTERS 32
#define ARMV8_PMU_COUNTER_MASK (ARMV8_PMU_MAX_COUNTERS - 1)
diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h
index 5af574d632fa..6a5b28904c33 100644
--- a/arch/arm64/include/asm/probes.h
+++ b/arch/arm64/include/asm/probes.h
@@ -15,21 +15,22 @@
#ifndef _ARM_PROBES_H
#define _ARM_PROBES_H
-#include <asm/opcodes.h>
-
-struct kprobe;
-struct arch_specific_insn;
-
-typedef u32 kprobe_opcode_t;
-typedef void (kprobes_handler_t) (u32 opcode, long addr, struct pt_regs *);
+typedef u32 probe_opcode_t;
+typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *);
/* architecture specific copy of original instruction */
-struct arch_specific_insn {
- kprobe_opcode_t *insn;
+struct arch_probe_insn {
+ probe_opcode_t *insn;
pstate_check_t *pstate_cc;
- kprobes_handler_t *handler;
+ probes_handler_t *handler;
/* restore address after step xol */
unsigned long restore;
};
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+};
+#endif
#endif
diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index 07b8ed037dee..6afd8476c60c 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -16,9 +16,10 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
-#ifdef CONFIG_ARM64_PTDUMP
+#ifdef CONFIG_ARM64_PTDUMP_CORE
#include <linux/mm_types.h>
+#include <linux/seq_file.h>
struct addr_marker {
unsigned long start_address;
@@ -29,16 +30,25 @@ struct ptdump_info {
struct mm_struct *mm;
const struct addr_marker *markers;
unsigned long base_addr;
- unsigned long max_addr;
};
-int ptdump_register(struct ptdump_info *info, const char *name);
-
+void ptdump_walk_pgd(struct seq_file *s, struct ptdump_info *info);
+#ifdef CONFIG_ARM64_PTDUMP_DEBUGFS
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name);
#else
-static inline int ptdump_register(struct ptdump_info *info, const char *name)
+static inline int ptdump_debugfs_register(struct ptdump_info *info,
+ const char *name)
{
return 0;
}
-#endif /* CONFIG_ARM64_PTDUMP */
+#endif
+void ptdump_check_wx(void);
+#endif /* CONFIG_ARM64_PTDUMP_CORE */
+
+#ifdef CONFIG_DEBUG_WX
+#define debug_checkwx() ptdump_check_wx()
+#else
+#define debug_checkwx() do { } while (0)
+#endif
#endif /* __ASM_PTDUMP_H */
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index ada08b5b036d..513daf050e84 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -217,6 +217,14 @@ int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
#include <asm-generic/ptrace.h>
+#define procedure_link_pointer(regs) ((regs)->regs[30])
+
+static inline void procedure_link_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ procedure_link_pointer(regs) = val;
+}
+
#undef profile_pc
extern unsigned long profile_pc(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 022644704a93..d050d720a1b4 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -29,11 +29,22 @@
#ifndef __ASSEMBLY__
+#include <asm/percpu.h>
+
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/thread_info.h>
-#define raw_smp_processor_id() (current_thread_info()->cpu)
+DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
+
+/*
+ * We don't use this_cpu_read(cpu_number) as that has implicit writes to
+ * preempt_count, and associated (compiler) barriers, that we'd like to avoid
+ * the expense of. If we're preemptible, the value can be stale at use anyway.
+ * And we can't use this_cpu_ptr() either, as that winds up recursing back
+ * here under CONFIG_DEBUG_PREEMPT=y.
+ */
+#define raw_smp_processor_id() (*raw_cpu_ptr(&cpu_number))
struct seq_file;
@@ -73,6 +84,7 @@ asmlinkage void secondary_start_kernel(void);
*/
struct secondary_data {
void *stack;
+ struct task_struct *task;
long status;
};
diff --git a/arch/arm64/include/asm/stack_pointer.h b/arch/arm64/include/asm/stack_pointer.h
new file mode 100644
index 000000000000..ffcdf742cddf
--- /dev/null
+++ b/arch/arm64/include/asm/stack_pointer.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_STACK_POINTER_H
+#define __ASM_STACK_POINTER_H
+
+/*
+ * how to get the current stack pointer from C
+ */
+register unsigned long current_stack_pointer asm ("sp");
+
+#endif /* __ASM_STACK_POINTER_H */
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index b8a313fd7a09..de5600f40adf 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -1,7 +1,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H
-#define NR_CTX_REGS 10
+#define NR_CTX_REGS 12
#define NR_CALLEE_SAVED_REGS 12
/*
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6c80b3699cb8..98ae03f8eedd 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -22,8 +22,6 @@
#include <linux/stringify.h>
-#include <asm/opcodes.h>
-
/*
* ARMv8 ARM reserves the following encoding for system registers:
* (Ref: ARMv8 ARM, Section: "System instruction class encoding overview",
@@ -37,6 +35,33 @@
#define sys_reg(op0, op1, crn, crm, op2) \
((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5))
+#ifndef CONFIG_BROKEN_GAS_INST
+
+#ifdef __ASSEMBLY__
+#define __emit_inst(x) .inst (x)
+#else
+#define __emit_inst(x) ".inst " __stringify((x)) "\n\t"
+#endif
+
+#else /* CONFIG_BROKEN_GAS_INST */
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+#define __INSTR_BSWAP(x) (x)
+#else /* CONFIG_CPU_BIG_ENDIAN */
+#define __INSTR_BSWAP(x) ((((x) << 24) & 0xff000000) | \
+ (((x) << 8) & 0x00ff0000) | \
+ (((x) >> 8) & 0x0000ff00) | \
+ (((x) >> 24) & 0x000000ff))
+#endif /* CONFIG_CPU_BIG_ENDIAN */
+
+#ifdef __ASSEMBLY__
+#define __emit_inst(x) .long __INSTR_BSWAP(x)
+#else /* __ASSEMBLY__ */
+#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_BROKEN_GAS_INST */
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
@@ -81,10 +106,10 @@
#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4)
#define REG_PSTATE_UAO_IMM sys_reg(0, 0, 4, 0, 3)
-#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\
- (!!x)<<8 | 0x1f)
-#define SET_PSTATE_UAO(x) __inst_arm(0xd5000000 | REG_PSTATE_UAO_IMM |\
- (!!x)<<8 | 0x1f)
+#define SET_PSTATE_PAN(x) __emit_inst(0xd5000000 | REG_PSTATE_PAN_IMM | \
+ (!!x)<<8 | 0x1f)
+#define SET_PSTATE_UAO(x) __emit_inst(0xd5000000 | REG_PSTATE_UAO_IMM | \
+ (!!x)<<8 | 0x1f)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_EE (1 << 25)
@@ -228,11 +253,11 @@
.equ .L__reg_num_xzr, 31
.macro mrs_s, rt, sreg
- .inst 0xd5200000|(\sreg)|(.L__reg_num_\rt)
+ __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt))
.endm
.macro msr_s, sreg, rt
- .inst 0xd5000000|(\sreg)|(.L__reg_num_\rt)
+ __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt))
.endm
#else
@@ -246,11 +271,11 @@ asm(
" .equ .L__reg_num_xzr, 31\n"
"\n"
" .macro mrs_s, rt, sreg\n"
-" .inst 0xd5200000|(\\sreg)|(.L__reg_num_\\rt)\n"
+ __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))
" .endm\n"
"\n"
" .macro msr_s, sreg, rt\n"
-" .inst 0xd5000000|(\\sreg)|(.L__reg_num_\\rt)\n"
+ __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))
" .endm\n"
);
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index e9ea5a6bd449..46c3b93cf865 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -36,58 +36,31 @@
struct task_struct;
+#include <asm/stack_pointer.h>
#include <asm/types.h>
typedef unsigned long mm_segment_t;
/*
* low level task data that entry.S needs immediate access to.
- * __switch_to() assumes cpu_context follows immediately after cpu_domain.
*/
struct thread_info {
unsigned long flags; /* low level flags */
mm_segment_t addr_limit; /* address limit */
- struct task_struct *task; /* main task structure */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ u64 ttbr0; /* saved TTBR0_EL1 */
+#endif
int preempt_count; /* 0 => preemptable, <0 => bug */
- int cpu; /* cpu */
};
#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
- .flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
}
-#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
-/*
- * how to get the current stack pointer from C
- */
-register unsigned long current_stack_pointer asm ("sp");
-
-/*
- * how to get the thread information struct from C
- */
-static inline struct thread_info *current_thread_info(void) __attribute_const__;
-
-/*
- * struct thread_info can be accessed directly via sp_el0.
- *
- * We don't use read_sysreg() as we want the compiler to cache the value where
- * possible.
- */
-static inline struct thread_info *current_thread_info(void)
-{
- unsigned long sp_el0;
-
- asm ("mrs %0, sp_el0" : "=r" (sp_el0));
-
- return (struct thread_info *)sp_el0;
-}
-
#define thread_saved_pc(tsk) \
((unsigned long)(tsk->thread.cpu_context.pc))
#define thread_saved_sp(tsk) \
@@ -112,6 +85,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */
+#define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */
#define TIF_NOHZ 7
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
@@ -132,10 +106,12 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_32BIT (1 << TIF_32BIT)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
- _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE)
+ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
+ _TIF_UPROBE)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 55d0adbf6509..d26750ca6e06 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -18,6 +18,12 @@
#ifndef __ASM_UACCESS_H
#define __ASM_UACCESS_H
+#include <asm/alternative.h>
+#include <asm/kernel-pgtable.h>
+#include <asm/sysreg.h>
+
+#ifndef __ASSEMBLY__
+
/*
* User space memory access functions
*/
@@ -26,10 +32,8 @@
#include <linux/string.h>
#include <linux/thread_info.h>
-#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/ptrace.h>
-#include <asm/sysreg.h>
#include <asm/errno.h>
#include <asm/memory.h>
#include <asm/compiler.h>
@@ -120,6 +124,99 @@ static inline void set_fs(mm_segment_t fs)
" .popsection\n"
/*
+ * User access enabling/disabling.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+static inline void __uaccess_ttbr0_disable(void)
+{
+ unsigned long ttbr;
+
+ /* reserved_ttbr0 placed at the end of swapper_pg_dir */
+ ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
+ write_sysreg(ttbr, ttbr0_el1);
+ isb();
+}
+
+static inline void __uaccess_ttbr0_enable(void)
+{
+ unsigned long flags;
+
+ /*
+ * Disable interrupts to avoid preemption between reading the 'ttbr0'
+ * variable and the MSR. A context switch could trigger an ASID
+ * roll-over and an update of 'ttbr0'.
+ */
+ local_irq_save(flags);
+ write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
+ isb();
+ local_irq_restore(flags);
+}
+
+static inline bool uaccess_ttbr0_disable(void)
+{
+ if (!system_uses_ttbr0_pan())
+ return false;
+ __uaccess_ttbr0_disable();
+ return true;
+}
+
+static inline bool uaccess_ttbr0_enable(void)
+{
+ if (!system_uses_ttbr0_pan())
+ return false;
+ __uaccess_ttbr0_enable();
+ return true;
+}
+#else
+static inline bool uaccess_ttbr0_disable(void)
+{
+ return false;
+}
+
+static inline bool uaccess_ttbr0_enable(void)
+{
+ return false;
+}
+#endif
+
+#define __uaccess_disable(alt) \
+do { \
+ if (!uaccess_ttbr0_disable()) \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \
+ CONFIG_ARM64_PAN)); \
+} while (0)
+
+#define __uaccess_enable(alt) \
+do { \
+ if (!uaccess_ttbr0_enable()) \
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \
+ CONFIG_ARM64_PAN)); \
+} while (0)
+
+static inline void uaccess_disable(void)
+{
+ __uaccess_disable(ARM64_HAS_PAN);
+}
+
+static inline void uaccess_enable(void)
+{
+ __uaccess_enable(ARM64_HAS_PAN);
+}
+
+/*
+ * These functions are no-ops when UAO is present.
+ */
+static inline void uaccess_disable_not_uao(void)
+{
+ __uaccess_disable(ARM64_ALT_PAN_NOT_UAO);
+}
+
+static inline void uaccess_enable_not_uao(void)
+{
+ __uaccess_enable(ARM64_ALT_PAN_NOT_UAO);
+}
+
+/*
* The "__xxx" versions of the user access functions do not verify the address
* space - it must have been done previously with a separate "access_ok()"
* call.
@@ -146,8 +243,7 @@ static inline void set_fs(mm_segment_t fs)
do { \
unsigned long __gu_val; \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
+ uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
__get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \
@@ -168,9 +264,8 @@ do { \
default: \
BUILD_BUG(); \
} \
+ uaccess_disable_not_uao(); \
(x) = (__force __typeof__(*(ptr)))__gu_val; \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
} while (0)
#define __get_user(x, ptr) \
@@ -215,8 +310,7 @@ do { \
do { \
__typeof__(*(ptr)) __pu_val = (x); \
__chk_user_ptr(ptr); \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
+ uaccess_enable_not_uao(); \
switch (sizeof(*(ptr))) { \
case 1: \
__put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \
@@ -237,8 +331,7 @@ do { \
default: \
BUILD_BUG(); \
} \
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_ALT_PAN_NOT_UAO,\
- CONFIG_ARM64_PAN)); \
+ uaccess_disable_not_uao(); \
} while (0)
#define __put_user(x, ptr) \
@@ -331,4 +424,66 @@ extern long strncpy_from_user(char *dest, const char __user *src, long count);
extern __must_check long strlen_user(const char __user *str);
extern __must_check long strnlen_user(const char __user *str, long n);
+#else /* __ASSEMBLY__ */
+
+#include <asm/assembler.h>
+
+/*
+ * User access enabling/disabling macros.
+ */
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ .macro __uaccess_ttbr0_disable, tmp1
+ mrs \tmp1, ttbr1_el1 // swapper_pg_dir
+ add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
+ msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
+ isb
+ .endm
+
+ .macro __uaccess_ttbr0_enable, tmp1
+ get_thread_info \tmp1
+ ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
+ msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
+ isb
+ .endm
+
+ .macro uaccess_ttbr0_disable, tmp1
+alternative_if_not ARM64_HAS_PAN
+ __uaccess_ttbr0_disable \tmp1
+alternative_else_nop_endif
+ .endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2
+alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp2 // avoid preemption
+ __uaccess_ttbr0_enable \tmp1
+ restore_irq \tmp2
+alternative_else_nop_endif
+ .endm
+#else
+ .macro uaccess_ttbr0_disable, tmp1
+ .endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .endm
+#endif
+
+/*
+ * These macros are no-ops when UAO is present.
+ */
+ .macro uaccess_disable_not_uao, tmp1
+ uaccess_ttbr0_disable \tmp1
+alternative_if ARM64_ALT_PAN_NOT_UAO
+ SET_PSTATE_PAN(1)
+alternative_else_nop_endif
+ .endm
+
+ .macro uaccess_enable_not_uao, tmp1, tmp2
+ uaccess_ttbr0_enable \tmp1, \tmp2
+alternative_if ARM64_ALT_PAN_NOT_UAO
+ SET_PSTATE_PAN(0)
+alternative_else_nop_endif
+ .endm
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __ASM_UACCESS_H */
diff --git a/arch/arm64/include/asm/uprobes.h b/arch/arm64/include/asm/uprobes.h
new file mode 100644
index 000000000000..8d004073d0e8
--- /dev/null
+++ b/arch/arm64/include/asm/uprobes.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014-2016 Pratyush Anand <panand@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
+#include <asm/probes.h>
+
+#define MAX_UINSN_BYTES AARCH64_INSN_SIZE
+
+#define UPROBE_SWBP_INSN BRK64_OPCODE_UPROBES
+#define UPROBE_SWBP_INSN_SIZE AARCH64_INSN_SIZE
+#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
+
+typedef u32 uprobe_opcode_t;
+
+struct arch_uprobe_task {
+};
+
+struct arch_uprobe {
+ union {
+ u8 insn[MAX_UINSN_BYTES];
+ u8 ixol[MAX_UINSN_BYTES];
+ };
+ struct arch_probe_insn api;
+ bool simulate;
+};
+
+#endif
diff --git a/arch/arm64/include/asm/xen/hypercall.h b/arch/arm64/include/asm/xen/hypercall.h
index 74b0c423ff5b..3522cbaed316 100644
--- a/arch/arm64/include/asm/xen/hypercall.h
+++ b/arch/arm64/include/asm/xen/hypercall.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/hypercall.h>
+#include <xen/arm/hypercall.h>
diff --git a/arch/arm64/include/asm/xen/hypervisor.h b/arch/arm64/include/asm/xen/hypervisor.h
index f263da8e8769..d6e7709d0688 100644
--- a/arch/arm64/include/asm/xen/hypervisor.h
+++ b/arch/arm64/include/asm/xen/hypervisor.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/hypervisor.h>
+#include <xen/arm/hypervisor.h>
diff --git a/arch/arm64/include/asm/xen/interface.h b/arch/arm64/include/asm/xen/interface.h
index 44457aebeed4..88c0d75da190 100644
--- a/arch/arm64/include/asm/xen/interface.h
+++ b/arch/arm64/include/asm/xen/interface.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/interface.h>
+#include <xen/arm/interface.h>
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
index 2052102b4e02..b3ef061d8b74 100644
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/page-coherent.h>
+#include <xen/arm/page-coherent.h>
diff --git a/arch/arm64/include/asm/xen/page.h b/arch/arm64/include/asm/xen/page.h
index bed87ec36780..31bbc803cecb 100644
--- a/arch/arm64/include/asm/xen/page.h
+++ b/arch/arm64/include/asm/xen/page.h
@@ -1 +1 @@
-#include <../../arm/include/asm/xen/page.h>
+#include <xen/arm/page.h>
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index b0988bb1bf64..04de188a36c9 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -14,10 +14,8 @@
#include <linux/slab.h>
#include <linux/sysctl.h>
-#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/insn.h>
-#include <asm/opcodes.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/traps.h>
@@ -285,10 +283,10 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
#define __SWP_LL_SC_LOOPS 4
#define __user_swpX_asm(data, addr, res, temp, temp2, B) \
+do { \
+ uaccess_enable(); \
__asm__ __volatile__( \
" mov %w3, %w7\n" \
- ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
"0: ldxr"B" %w2, [%4]\n" \
"1: stxr"B" %w0, %w1, [%4]\n" \
" cbz %w0, 2f\n" \
@@ -306,12 +304,12 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table)
" .popsection" \
_ASM_EXTABLE(0b, 4b) \
_ASM_EXTABLE(1b, 4b) \
- ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \
- CONFIG_ARM64_PAN) \
: "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \
: "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \
"i" (__SWP_LL_SC_LOOPS) \
- : "memory")
+ : "memory"); \
+ uaccess_disable(); \
+} while (0)
#define __user_swp_asm(data, addr, res, temp, temp2) \
__user_swpX_asm(data, addr, res, temp, temp2, "")
@@ -352,6 +350,10 @@ static int emulate_swpX(unsigned int address, unsigned int *data,
return res;
}
+#define ARM_OPCODE_CONDTEST_FAIL 0
+#define ARM_OPCODE_CONDTEST_PASS 1
+#define ARM_OPCODE_CONDTEST_UNCOND 2
+
#define ARM_OPCODE_CONDITION_UNCOND 0xf
static unsigned int __kprobes aarch32_check_condition(u32 opcode, u32 psr)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 4a2f0f0fef32..bc049afc73a7 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -36,11 +36,13 @@ int main(void)
{
DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
BLANK();
- DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
- DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
- DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit));
- DEFINE(TI_TASK, offsetof(struct thread_info, task));
- DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+ DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags));
+ DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count));
+ DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+ DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
BLANK();
@@ -123,6 +125,7 @@ int main(void)
DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
BLANK();
DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack));
+ DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task));
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c02504ea304b..fdf8f045929f 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -47,6 +47,7 @@ unsigned int compat_elf_hwcap2 __read_mostly;
#endif
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcaps);
DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcap_keys);
@@ -746,6 +747,14 @@ static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry,
return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode();
}
+static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unused)
+{
+ u64 pfr0 = read_system_reg(SYS_ID_AA64PFR0_EL1);
+
+ return cpuid_feature_extract_signed_field(pfr0,
+ ID_AA64PFR0_FP_SHIFT) < 0;
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -829,6 +838,13 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.def_scope = SCOPE_SYSTEM,
.matches = hyp_offset_low,
},
+ {
+ /* FP/SIMD is not implemented */
+ .capability = ARM64_HAS_NO_FPSIMD,
+ .def_scope = SCOPE_SYSTEM,
+ .min_field_value = 0,
+ .matches = has_no_fpsimd,
+ },
{},
};
@@ -1102,5 +1118,5 @@ void __init setup_cpu_features(void)
static bool __maybe_unused
cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
{
- return (cpus_have_cap(ARM64_HAS_PAN) && !cpus_have_cap(ARM64_HAS_UAO));
+ return (cpus_have_const_cap(ARM64_HAS_PAN) && !cpus_have_const_cap(ARM64_HAS_UAO));
}
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 73ae90ef434c..605df76f0a06 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -226,6 +226,8 @@ static void send_user_sigtrap(int si_code)
static int single_step_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
+ bool handler_found = false;
+
/*
* If we are stepping a pending breakpoint, call the hw_breakpoint
* handler first.
@@ -233,7 +235,14 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
if (!reinstall_suspended_bps(regs))
return 0;
- if (user_mode(regs)) {
+#ifdef CONFIG_KPROBES
+ if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
+#endif
+ if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
+
+ if (!handler_found && user_mode(regs)) {
send_user_sigtrap(TRAP_TRACE);
/*
@@ -243,15 +252,8 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
* to the active-not-pending state).
*/
user_rewind_single_step(current);
- } else {
-#ifdef CONFIG_KPROBES
- if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-#endif
- if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
- return 0;
-
- pr_warning("Unexpected kernel single-step exception at EL1\n");
+ } else if (!handler_found) {
+ pr_warn("Unexpected kernel single-step exception at EL1\n");
/*
* Re-enable stepping since we know that we will be
* returning to regs.
@@ -304,16 +306,20 @@ NOKPROBE_SYMBOL(call_break_hook);
static int brk_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- if (user_mode(regs)) {
- send_user_sigtrap(TRAP_BRKPT);
- }
+ bool handler_found = false;
+
#ifdef CONFIG_KPROBES
- else if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
- if (kprobe_breakpoint_handler(regs, esr) != DBG_HOOK_HANDLED)
- return -EFAULT;
+ if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
+ if (kprobe_breakpoint_handler(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
}
#endif
- else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) {
+ if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ handler_found = true;
+
+ if (!handler_found && user_mode(regs)) {
+ send_user_sigtrap(TRAP_BRKPT);
+ } else if (!handler_found) {
pr_warn("Unexpected kernel BRK exception at EL1\n");
return -EFAULT;
}
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index ba9bee389fd5..5d17f377d905 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -62,8 +62,8 @@ struct screen_info screen_info __section(.data);
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pteval_t prot_val = create_mapping_protection(md);
- bool allow_block_mappings = (md->type != EFI_RUNTIME_SERVICES_CODE &&
- md->type != EFI_RUNTIME_SERVICES_DATA);
+ bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
+ md->type == EFI_RUNTIME_SERVICES_DATA);
if (!PAGE_ALIGNED(md->phys_addr) ||
!PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) {
@@ -76,12 +76,12 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
* from the MMU routines. So avoid block mappings altogether in
* that case.
*/
- allow_block_mappings = false;
+ page_mappings_only = true;
}
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
- __pgprot(prot_val | PTE_NG), allow_block_mappings);
+ __pgprot(prot_val | PTE_NG), page_mappings_only);
return 0;
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 223d54a4d66b..4f0d76339414 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -29,7 +29,9 @@
#include <asm/esr.h>
#include <asm/irq.h>
#include <asm/memory.h>
+#include <asm/ptrace.h>
#include <asm/thread_info.h>
+#include <asm/uaccess.h>
#include <asm/unistd.h>
/*
@@ -90,9 +92,8 @@
.if \el == 0
mrs x21, sp_el0
- mov tsk, sp
- and tsk, tsk, #~(THREAD_SIZE - 1) // Ensure MDSCR_EL1.SS is clear,
- ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug
+ ldr_this_cpu tsk, __entry_task, x20 // Ensure MDSCR_EL1.SS is clear,
+ ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug
disable_step_tsk x19, x20 // exceptions when scheduling.
mov x29, xzr // fp pointed to user-space
@@ -100,15 +101,41 @@
add x21, sp, #S_FRAME_SIZE
get_thread_info tsk
/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
- ldr x20, [tsk, #TI_ADDR_LIMIT]
+ ldr x20, [tsk, #TSK_TI_ADDR_LIMIT]
str x20, [sp, #S_ORIG_ADDR_LIMIT]
mov x20, #TASK_SIZE_64
- str x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
.endif /* \el == 0 */
mrs x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR]
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
+ * EL0, there is no need to check the state of TTBR0_EL1 since
+ * accesses are always enabled.
+ * Note that the meaning of this bit differs from the ARMv8.1 PAN
+ * feature as all TTBR0_EL1 accesses are disabled, not just those to
+ * user mappings.
+ */
+alternative_if ARM64_HAS_PAN
+ b 1f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ mrs x21, ttbr0_el1
+ tst x21, #0xffff << 48 // Check for the reserved ASID
+ orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
+ b.eq 1f // TTBR0 access already disabled
+ and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
+ .endif
+
+ __uaccess_ttbr0_disable x21
+1:
+#endif
+
stp x22, x23, [sp, #S_PC]
/*
@@ -139,7 +166,7 @@
.if \el != 0
/* Restore the task's original addr_limit. */
ldr x20, [sp, #S_ORIG_ADDR_LIMIT]
- str x20, [tsk, #TI_ADDR_LIMIT]
+ str x20, [tsk, #TSK_TI_ADDR_LIMIT]
/* No need to restore UAO, it will be restored from SPSR_EL1 */
.endif
@@ -147,6 +174,40 @@
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
.if \el == 0
ct_user_enter
+ .endif
+
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
+ * PAN bit checking.
+ */
+alternative_if ARM64_HAS_PAN
+ b 2f // skip TTBR0 PAN
+alternative_else_nop_endif
+
+ .if \el != 0
+ tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
+ .endif
+
+ __uaccess_ttbr0_enable x0
+
+ .if \el == 0
+ /*
+ * Enable errata workarounds only if returning to user. The only
+ * workaround currently required for TTBR0_EL1 changes are for the
+ * Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
+ * corruption).
+ */
+ post_ttbr0_update_workaround
+ .endif
+1:
+ .if \el != 0
+ and x22, x22, #~PSR_PAN_BIT // ARMv8.0 CPUs do not understand this bit
+ .endif
+2:
+#endif
+
+ .if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
#ifdef CONFIG_ARM64_ERRATUM_845719
@@ -162,6 +223,7 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
.endif
+
msr elr_el1, x21 // set up the return data
msr spsr_el1, x22
ldp x0, x1, [sp, #16 * 0]
@@ -184,23 +246,20 @@ alternative_else_nop_endif
eret // return to kernel
.endm
- .macro get_thread_info, rd
- mrs \rd, sp_el0
- .endm
-
.macro irq_stack_entry
mov x19, sp // preserve the original sp
/*
- * Compare sp with the current thread_info, if the top
- * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
- * should switch to the irq stack.
+ * Compare sp with the base of the task stack.
+ * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
+ * and should switch to the irq stack.
*/
- and x25, x19, #~(THREAD_SIZE - 1)
- cmp x25, tsk
- b.ne 9998f
+ ldr x25, [tsk, TSK_STACK]
+ eor x25, x25, x19
+ and x25, x25, #~(THREAD_SIZE - 1)
+ cbnz x25, 9998f
- this_cpu_ptr irq_stack, x25, x26
+ adr_this_cpu x25, irq_stack, x26
mov x26, #IRQ_STACK_START_SP
add x26, x25, x26
@@ -427,9 +486,9 @@ el1_irq:
irq_handler
#ifdef CONFIG_PREEMPT
- ldr w24, [tsk, #TI_PREEMPT] // get preempt count
+ ldr w24, [tsk, #TSK_TI_PREEMPT] // get preempt count
cbnz w24, 1f // preempt count != 0
- ldr x0, [tsk, #TI_FLAGS] // get flags
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get flags
tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling?
bl el1_preempt
1:
@@ -444,7 +503,7 @@ ENDPROC(el1_irq)
el1_preempt:
mov x24, lr
1: bl preempt_schedule_irq // irq en/disable is done inside
- ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS
+ ldr x0, [tsk, #TSK_TI_FLAGS] // get new tasks TI_FLAGS
tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling?
ret x24
#endif
@@ -674,8 +733,7 @@ ENTRY(cpu_switch_to)
ldp x29, x9, [x8], #16
ldr lr, [x8]
mov sp, x9
- and x9, x9, #~(THREAD_SIZE - 1)
- msr sp_el0, x9
+ msr sp_el0, x1
ret
ENDPROC(cpu_switch_to)
@@ -686,7 +744,7 @@ ENDPROC(cpu_switch_to)
ret_fast_syscall:
disable_irq // disable interrupts
str x0, [sp, #S_X0] // returned x0
- ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for syscall tracing
and x2, x1, #_TIF_SYSCALL_WORK
cbnz x2, ret_fast_syscall_trace
and x2, x1, #_TIF_WORK_MASK
@@ -706,14 +764,14 @@ work_pending:
#ifdef CONFIG_TRACE_IRQFLAGS
bl trace_hardirqs_on // enabled while in userspace
#endif
- ldr x1, [tsk, #TI_FLAGS] // re-check for single-step
+ ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step
b finish_ret_to_user
/*
* "slow" syscall return path.
*/
ret_to_user:
disable_irq // disable interrupts
- ldr x1, [tsk, #TI_FLAGS]
+ ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
finish_ret_to_user:
@@ -746,7 +804,7 @@ el0_svc_naked: // compat entry point
enable_dbg_and_irq
ct_user_exit 1
- ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks
+ ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks
tst x16, #_TIF_SYSCALL_WORK
b.ne __sys_trace
cmp scno, sc_nr // check upper syscall limit
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 394c61db5566..b883f1f75216 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -127,6 +127,8 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
void fpsimd_thread_switch(struct task_struct *next)
{
+ if (!system_supports_fpsimd())
+ return;
/*
* Save the current FPSIMD state to memory, but only if whatever is in
* the registers is in fact the most recent userland FPSIMD state of
@@ -157,6 +159,8 @@ void fpsimd_thread_switch(struct task_struct *next)
void fpsimd_flush_thread(void)
{
+ if (!system_supports_fpsimd())
+ return;
memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
fpsimd_flush_task_state(current);
set_thread_flag(TIF_FOREIGN_FPSTATE);
@@ -168,6 +172,8 @@ void fpsimd_flush_thread(void)
*/
void fpsimd_preserve_current_state(void)
{
+ if (!system_supports_fpsimd())
+ return;
preempt_disable();
if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
fpsimd_save_state(&current->thread.fpsimd_state);
@@ -181,6 +187,8 @@ void fpsimd_preserve_current_state(void)
*/
void fpsimd_restore_current_state(void)
{
+ if (!system_supports_fpsimd())
+ return;
preempt_disable();
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -199,6 +207,8 @@ void fpsimd_restore_current_state(void)
*/
void fpsimd_update_current_state(struct fpsimd_state *state)
{
+ if (!system_supports_fpsimd())
+ return;
preempt_disable();
fpsimd_load_state(state);
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
@@ -228,6 +238,8 @@ static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
*/
void kernel_neon_begin_partial(u32 num_regs)
{
+ if (WARN_ON(!system_supports_fpsimd()))
+ return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
@@ -252,6 +264,8 @@ EXPORT_SYMBOL(kernel_neon_begin_partial);
void kernel_neon_end(void)
{
+ if (!system_supports_fpsimd())
+ return;
if (in_interrupt()) {
struct fpsimd_partial_state *s = this_cpu_ptr(
in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 332e33193ccf..4b1abac3485a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -326,14 +326,14 @@ __create_page_tables:
* dirty cache lines being evicted.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
bl __inval_cache_range
/*
* Clear the idmap and swapper page tables.
*/
adrp x0, idmap_pg_dir
- adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -412,7 +412,7 @@ __create_page_tables:
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
- adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE
+ adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
dmb sy
bl __inval_cache_range
@@ -428,7 +428,8 @@ ENDPROC(__create_page_tables)
__primary_switched:
adrp x4, init_thread_union
add sp, x4, #THREAD_SIZE
- msr sp_el0, x4 // Save thread_info
+ adr_l x5, init_task
+ msr sp_el0, x5 // Save thread_info
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
@@ -524,10 +525,21 @@ set_hcr:
msr hcr_el2, x0
isb
- /* Generic timers. */
+ /*
+ * Allow Non-secure EL1 and EL0 to access physical timer and counter.
+ * This is not necessary for VHE, since the host kernel runs in EL2,
+ * and EL0 accesses are configured in the later stage of boot process.
+ * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout
+ * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined
+ * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1
+ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in
+ * EL2.
+ */
+ cbnz x2, 1f
mrs x0, cnthctl_el2
orr x0, x0, #3 // Enable EL1 physical timers
msr cnthctl_el2, x0
+1:
msr cntvoff_el2, xzr // Clear virtual offset
#ifdef CONFIG_ARM_GIC_V3
@@ -699,10 +711,10 @@ __secondary_switched:
isb
adr_l x0, secondary_data
- ldr x0, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
- mov sp, x0
- and x0, x0, #~(THREAD_SIZE - 1)
- msr sp_el0, x0 // save thread_info
+ ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack
+ mov sp, x1
+ ldr x2, [x0, #CPU_BOOT_TASK]
+ msr sp_el0, x2
mov x29, #0
b secondary_start_kernel
ENDPROC(__secondary_switched)
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index d55a7b09959b..fe301cbcb442 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -136,7 +136,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
/* Save the mpidr of the cpu we called cpu_suspend() on... */
if (sleep_cpu < 0) {
- pr_err("Failing to hibernate on an unkown CPU.\n");
+ pr_err("Failing to hibernate on an unknown CPU.\n");
return -ENODEV;
}
hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu);
@@ -547,7 +547,7 @@ out:
int hibernate_resume_nonboot_cpu_disable(void)
{
if (sleep_cpu < 0) {
- pr_err("Failing to resume from hibernate on an unkown CPU.\n");
+ pr_err("Failing to resume from hibernate on an unknown CPU.\n");
return -ENODEV;
}
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 948b73148d56..1b3c747fedda 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -317,9 +317,21 @@ static int get_hbp_len(u8 hbp_len)
case ARM_BREAKPOINT_LEN_2:
len_in_bytes = 2;
break;
+ case ARM_BREAKPOINT_LEN_3:
+ len_in_bytes = 3;
+ break;
case ARM_BREAKPOINT_LEN_4:
len_in_bytes = 4;
break;
+ case ARM_BREAKPOINT_LEN_5:
+ len_in_bytes = 5;
+ break;
+ case ARM_BREAKPOINT_LEN_6:
+ len_in_bytes = 6;
+ break;
+ case ARM_BREAKPOINT_LEN_7:
+ len_in_bytes = 7;
+ break;
case ARM_BREAKPOINT_LEN_8:
len_in_bytes = 8;
break;
@@ -349,7 +361,7 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp)
* to generic breakpoint descriptions.
*/
int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
- int *gen_len, int *gen_type)
+ int *gen_len, int *gen_type, int *offset)
{
/* Type */
switch (ctrl.type) {
@@ -369,17 +381,33 @@ int arch_bp_generic_fields(struct arch_hw_breakpoint_ctrl ctrl,
return -EINVAL;
}
+ if (!ctrl.len)
+ return -EINVAL;
+ *offset = __ffs(ctrl.len);
+
/* Len */
- switch (ctrl.len) {
+ switch (ctrl.len >> *offset) {
case ARM_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
case ARM_BREAKPOINT_LEN_2:
*gen_len = HW_BREAKPOINT_LEN_2;
break;
+ case ARM_BREAKPOINT_LEN_3:
+ *gen_len = HW_BREAKPOINT_LEN_3;
+ break;
case ARM_BREAKPOINT_LEN_4:
*gen_len = HW_BREAKPOINT_LEN_4;
break;
+ case ARM_BREAKPOINT_LEN_5:
+ *gen_len = HW_BREAKPOINT_LEN_5;
+ break;
+ case ARM_BREAKPOINT_LEN_6:
+ *gen_len = HW_BREAKPOINT_LEN_6;
+ break;
+ case ARM_BREAKPOINT_LEN_7:
+ *gen_len = HW_BREAKPOINT_LEN_7;
+ break;
case ARM_BREAKPOINT_LEN_8:
*gen_len = HW_BREAKPOINT_LEN_8;
break;
@@ -423,9 +451,21 @@ static int arch_build_bp_info(struct perf_event *bp)
case HW_BREAKPOINT_LEN_2:
info->ctrl.len = ARM_BREAKPOINT_LEN_2;
break;
+ case HW_BREAKPOINT_LEN_3:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_3;
+ break;
case HW_BREAKPOINT_LEN_4:
info->ctrl.len = ARM_BREAKPOINT_LEN_4;
break;
+ case HW_BREAKPOINT_LEN_5:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_5;
+ break;
+ case HW_BREAKPOINT_LEN_6:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_6;
+ break;
+ case HW_BREAKPOINT_LEN_7:
+ info->ctrl.len = ARM_BREAKPOINT_LEN_7;
+ break;
case HW_BREAKPOINT_LEN_8:
info->ctrl.len = ARM_BREAKPOINT_LEN_8;
break;
@@ -517,18 +557,17 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
default:
return -EINVAL;
}
-
- info->address &= ~alignment_mask;
- info->ctrl.len <<= offset;
} else {
if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE)
alignment_mask = 0x3;
else
alignment_mask = 0x7;
- if (info->address & alignment_mask)
- return -EINVAL;
+ offset = info->address & alignment_mask;
}
+ info->address &= ~alignment_mask;
+ info->ctrl.len <<= offset;
+
/*
* Disallow per-task kernel breakpoints since these would
* complicate the stepping code.
@@ -661,12 +700,47 @@ unlock:
}
NOKPROBE_SYMBOL(breakpoint_handler);
+/*
+ * Arm64 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See Section D2.10.5 "Determining the memory location that caused a Watchpoint
+ * exception" of ARMv8 Architecture Reference Manual for details.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
+ struct arch_hw_breakpoint_ctrl *ctrl)
+{
+ u64 wp_low, wp_high;
+ u32 lens, lene;
+
+ lens = __ffs(ctrl->len);
+ lene = __fls(ctrl->len);
+
+ wp_low = val + lens;
+ wp_high = val + lene;
+ if (addr < wp_low)
+ return wp_low - addr;
+ else if (addr > wp_high)
+ return addr - wp_high;
+ else
+ return 0;
+}
+
static int watchpoint_handler(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
- int i, step = 0, *kernel_step, access;
+ int i, step = 0, *kernel_step, access, closest_match = 0;
+ u64 min_dist = -1, dist;
u32 ctrl_reg;
- u64 val, alignment_mask;
+ u64 val;
struct perf_event *wp, **slots;
struct debug_info *debug_info;
struct arch_hw_breakpoint *info;
@@ -675,35 +749,15 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
slots = this_cpu_ptr(wp_on_reg);
debug_info = &current->thread.debug;
+ /*
+ * Find all watchpoints that match the reported address. If no exact
+ * match is found. Attribute the hit to the closest watchpoint.
+ */
+ rcu_read_lock();
for (i = 0; i < core_num_wrps; ++i) {
- rcu_read_lock();
-
wp = slots[i];
-
if (wp == NULL)
- goto unlock;
-
- info = counter_arch_bp(wp);
- /* AArch32 watchpoints are either 4 or 8 bytes aligned. */
- if (is_compat_task()) {
- if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
- alignment_mask = 0x7;
- else
- alignment_mask = 0x3;
- } else {
- alignment_mask = 0x7;
- }
-
- /* Check if the watchpoint value matches. */
- val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
- if (val != (addr & ~alignment_mask))
- goto unlock;
-
- /* Possible match, check the byte address select to confirm. */
- ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
- decode_ctrl_reg(ctrl_reg, &ctrl);
- if (!((1 << (addr & alignment_mask)) & ctrl.len))
- goto unlock;
+ continue;
/*
* Check that the access type matches.
@@ -712,18 +766,41 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr,
access = (esr & AARCH64_ESR_ACCESS_MASK) ? HW_BREAKPOINT_W :
HW_BREAKPOINT_R;
if (!(access & hw_breakpoint_type(wp)))
- goto unlock;
+ continue;
+ /* Check if the watchpoint value and byte select match. */
+ val = read_wb_reg(AARCH64_DBG_REG_WVR, i);
+ ctrl_reg = read_wb_reg(AARCH64_DBG_REG_WCR, i);
+ decode_ctrl_reg(ctrl_reg, &ctrl);
+ dist = get_distance_from_watchpoint(addr, val, &ctrl);
+ if (dist < min_dist) {
+ min_dist = dist;
+ closest_match = i;
+ }
+ /* Is this an exact match? */
+ if (dist != 0)
+ continue;
+
+ info = counter_arch_bp(wp);
info->trigger = addr;
perf_bp_event(wp, regs);
/* Do we need to handle the stepping? */
if (is_default_overflow_handler(wp))
step = 1;
+ }
+ if (min_dist > 0 && min_dist != -1) {
+ /* No exact match found. */
+ wp = slots[closest_match];
+ info = counter_arch_bp(wp);
+ info->trigger = addr;
+ perf_bp_event(wp, regs);
-unlock:
- rcu_read_unlock();
+ /* Do we need to handle the stepping? */
+ if (is_default_overflow_handler(wp))
+ step = 1;
}
+ rcu_read_unlock();
if (!step)
return 0;
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 6f2ac4fc66ca..94b62c1fa4df 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -30,7 +30,6 @@
#include <asm/cacheflush.h>
#include <asm/debug-monitors.h>
#include <asm/fixmap.h>
-#include <asm/opcodes.h>
#include <asm/insn.h>
#define AARCH64_INSN_SF_BIT BIT(31)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index e017a9493b92..d217c9e95b06 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -247,6 +247,9 @@ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
{
+ if (!kgdb_single_step)
+ return DBG_HOOK_ERROR;
+
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return 0;
}
diff --git a/arch/arm64/kernel/probes/Makefile b/arch/arm64/kernel/probes/Makefile
index ce06312e3d34..89b6df613dde 100644
--- a/arch/arm64/kernel/probes/Makefile
+++ b/arch/arm64/kernel/probes/Makefile
@@ -1,3 +1,5 @@
obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o \
kprobes_trampoline.o \
simulate-insn.o
+obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o \
+ simulate-insn.o
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index d1731bf977ef..6bf6657a5a52 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -17,7 +17,6 @@
#include <linux/kprobes.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
-#include <asm/kprobes.h>
#include <asm/insn.h>
#include <asm/sections.h>
@@ -78,8 +77,8 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn)
* INSN_GOOD If instruction is supported and uses instruction slot,
* INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
*/
-static enum kprobe_insn __kprobes
-arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *api)
{
/*
* Instructions reading or modifying the PC won't work from the XOL
@@ -89,26 +88,26 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD;
if (aarch64_insn_is_bcond(insn)) {
- asi->handler = simulate_b_cond;
+ api->handler = simulate_b_cond;
} else if (aarch64_insn_is_cbz(insn) ||
aarch64_insn_is_cbnz(insn)) {
- asi->handler = simulate_cbz_cbnz;
+ api->handler = simulate_cbz_cbnz;
} else if (aarch64_insn_is_tbz(insn) ||
aarch64_insn_is_tbnz(insn)) {
- asi->handler = simulate_tbz_tbnz;
+ api->handler = simulate_tbz_tbnz;
} else if (aarch64_insn_is_adr_adrp(insn)) {
- asi->handler = simulate_adr_adrp;
+ api->handler = simulate_adr_adrp;
} else if (aarch64_insn_is_b(insn) ||
aarch64_insn_is_bl(insn)) {
- asi->handler = simulate_b_bl;
+ api->handler = simulate_b_bl;
} else if (aarch64_insn_is_br(insn) ||
aarch64_insn_is_blr(insn) ||
aarch64_insn_is_ret(insn)) {
- asi->handler = simulate_br_blr_ret;
+ api->handler = simulate_br_blr_ret;
} else if (aarch64_insn_is_ldr_lit(insn)) {
- asi->handler = simulate_ldr_literal;
+ api->handler = simulate_ldr_literal;
} else if (aarch64_insn_is_ldrsw_lit(insn)) {
- asi->handler = simulate_ldrsw_literal;
+ api->handler = simulate_ldrsw_literal;
} else {
/*
* Instruction cannot be stepped out-of-line and we don't
@@ -120,6 +119,7 @@ arm_probe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi)
return INSN_GOOD_NO_SLOT;
}
+#ifdef CONFIG_KPROBES
static bool __kprobes
is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
{
@@ -138,12 +138,12 @@ is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
return false;
}
-enum kprobe_insn __kprobes
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
{
- enum kprobe_insn decoded;
- kprobe_opcode_t insn = le32_to_cpu(*addr);
- kprobe_opcode_t *scan_end = NULL;
+ enum probe_insn decoded;
+ probe_opcode_t insn = le32_to_cpu(*addr);
+ probe_opcode_t *scan_end = NULL;
unsigned long size = 0, offset = 0;
/*
@@ -162,7 +162,7 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
else
scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
}
- decoded = arm_probe_decode_insn(insn, asi);
+ decoded = arm_probe_decode_insn(insn, &asi->api);
if (decoded != INSN_REJECTED && scan_end)
if (is_probed_address_atomic(addr - 1, scan_end))
@@ -170,3 +170,4 @@ arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi)
return decoded;
}
+#endif
diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h
index d438289646a6..76d3f315407f 100644
--- a/arch/arm64/kernel/probes/decode-insn.h
+++ b/arch/arm64/kernel/probes/decode-insn.h
@@ -23,13 +23,17 @@
*/
#define MAX_ATOMIC_CONTEXT_SIZE (128 / sizeof(kprobe_opcode_t))
-enum kprobe_insn {
+enum probe_insn {
INSN_REJECTED,
INSN_GOOD_NO_SLOT,
INSN_GOOD,
};
-enum kprobe_insn __kprobes
+#ifdef CONFIG_KPROBES
+enum probe_insn __kprobes
arm_kprobe_decode_insn(kprobe_opcode_t *addr, struct arch_specific_insn *asi);
+#endif
+enum probe_insn __kprobes
+arm_probe_decode_insn(probe_opcode_t insn, struct arch_probe_insn *asi);
#endif /* _ARM_KERNEL_KPROBES_ARM64_H */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f5077ea7af6d..1decd2b2c730 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -44,31 +44,31 @@ post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
/* prepare insn slot */
- p->ainsn.insn[0] = cpu_to_le32(p->opcode);
+ p->ainsn.api.insn[0] = cpu_to_le32(p->opcode);
- flush_icache_range((uintptr_t) (p->ainsn.insn),
- (uintptr_t) (p->ainsn.insn) +
+ flush_icache_range((uintptr_t) (p->ainsn.api.insn),
+ (uintptr_t) (p->ainsn.api.insn) +
MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
/*
* Needs restoring of return address after stepping xol.
*/
- p->ainsn.restore = (unsigned long) p->addr +
+ p->ainsn.api.restore = (unsigned long) p->addr +
sizeof(kprobe_opcode_t);
}
static void __kprobes arch_prepare_simulate(struct kprobe *p)
{
/* This instructions is not executed xol. No need to adjust the PC */
- p->ainsn.restore = 0;
+ p->ainsn.api.restore = 0;
}
static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (p->ainsn.handler)
- p->ainsn.handler((u32)p->opcode, (long)p->addr, regs);
+ if (p->ainsn.api.handler)
+ p->ainsn.api.handler((u32)p->opcode, (long)p->addr, regs);
/* single step simulated, now go for post processing */
post_kprobe_handler(kcb, regs);
@@ -98,18 +98,18 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
return -EINVAL;
case INSN_GOOD_NO_SLOT: /* insn need simulation */
- p->ainsn.insn = NULL;
+ p->ainsn.api.insn = NULL;
break;
case INSN_GOOD: /* instruction uses slot */
- p->ainsn.insn = get_insn_slot();
- if (!p->ainsn.insn)
+ p->ainsn.api.insn = get_insn_slot();
+ if (!p->ainsn.api.insn)
return -ENOMEM;
break;
};
/* prepare the instruction */
- if (p->ainsn.insn)
+ if (p->ainsn.api.insn)
arch_prepare_ss_slot(p);
else
arch_prepare_simulate(p);
@@ -142,9 +142,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
void __kprobes arch_remove_kprobe(struct kprobe *p)
{
- if (p->ainsn.insn) {
- free_insn_slot(p->ainsn.insn, 0);
- p->ainsn.insn = NULL;
+ if (p->ainsn.api.insn) {
+ free_insn_slot(p->ainsn.api.insn, 0);
+ p->ainsn.api.insn = NULL;
}
}
@@ -244,9 +244,9 @@ static void __kprobes setup_singlestep(struct kprobe *p,
}
- if (p->ainsn.insn) {
+ if (p->ainsn.api.insn) {
/* prepare for single stepping */
- slot = (unsigned long)p->ainsn.insn;
+ slot = (unsigned long)p->ainsn.api.insn;
set_ss_context(kcb, slot); /* mark pending ss */
@@ -295,8 +295,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
return;
/* return addr restore if non-branching insn */
- if (cur->ainsn.restore != 0)
- instruction_pointer_set(regs, cur->ainsn.restore);
+ if (cur->ainsn.api.restore != 0)
+ instruction_pointer_set(regs, cur->ainsn.api.restore);
/* restore back original saved kprobe variables and continue */
if (kcb->kprobe_status == KPROBE_REENTER) {
diff --git a/arch/arm64/kernel/probes/simulate-insn.c b/arch/arm64/kernel/probes/simulate-insn.c
index 8977ce9d009d..357d3efe1366 100644
--- a/arch/arm64/kernel/probes/simulate-insn.c
+++ b/arch/arm64/kernel/probes/simulate-insn.c
@@ -13,28 +13,26 @@
* General Public License for more details.
*/
+#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/kprobes.h>
#include "simulate-insn.h"
-#define sign_extend(x, signbit) \
- ((x) | (0 - ((x) & (1 << (signbit)))))
-
#define bbl_displacement(insn) \
- sign_extend(((insn) & 0x3ffffff) << 2, 27)
+ sign_extend32(((insn) & 0x3ffffff) << 2, 27)
#define bcond_displacement(insn) \
- sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+ sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20)
#define cbz_displacement(insn) \
- sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+ sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20)
#define tbz_displacement(insn) \
- sign_extend(((insn >> 5) & 0x3fff) << 2, 15)
+ sign_extend32(((insn >> 5) & 0x3fff) << 2, 15)
#define ldr_displacement(insn) \
- sign_extend(((insn >> 5) & 0x7ffff) << 2, 20)
+ sign_extend32(((insn >> 5) & 0x7ffff) << 2, 20)
static inline void set_x_reg(struct pt_regs *regs, int reg, u64 val)
{
@@ -106,7 +104,7 @@ simulate_adr_adrp(u32 opcode, long addr, struct pt_regs *regs)
xn = opcode & 0x1f;
imm = ((opcode >> 3) & 0x1ffffc) | ((opcode >> 29) & 0x3);
- imm = sign_extend(imm, 20);
+ imm = sign_extend64(imm, 20);
if (opcode & 0x80000000)
val = (imm<<12) + (addr & 0xfffffffffffff000);
else
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
new file mode 100644
index 000000000000..26c998534dca
--- /dev/null
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2014-2016 Pratyush Anand <panand@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+#include <asm/cacheflush.h>
+
+#include "decode-insn.h"
+
+#define UPROBE_INV_FAULT_CODE UINT_MAX
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ void *src, unsigned long len)
+{
+ void *xol_page_kaddr = kmap_atomic(page);
+ void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+
+ /* Initialize the slot */
+ memcpy(dst, src, len);
+
+ /* flush caches (dcache/icache) */
+ sync_icache_aliases(dst, len);
+
+ kunmap_atomic(xol_page_kaddr);
+}
+
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+ return instruction_pointer(regs);
+}
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ probe_opcode_t insn;
+
+ /* TODO: Currently we do not support AARCH32 instruction probing */
+ if (test_bit(TIF_32BIT, &mm->context.flags))
+ return -ENOTSUPP;
+ else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
+ return -EINVAL;
+
+ insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+
+ switch (arm_probe_decode_insn(insn, &auprobe->api)) {
+ case INSN_REJECTED:
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT:
+ auprobe->simulate = true;
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ /* Initialize with an invalid fault code to detect if ol insn trapped */
+ current->thread.fault_code = UPROBE_INV_FAULT_CODE;
+
+ /* Instruction points to execute ol */
+ instruction_pointer_set(regs, utask->xol_vaddr);
+
+ user_enable_single_step(current);
+
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ WARN_ON_ONCE(current->thread.fault_code != UPROBE_INV_FAULT_CODE);
+
+ /* Instruction points to execute next to breakpoint address */
+ instruction_pointer_set(regs, utask->vaddr + 4);
+
+ user_disable_single_step(current);
+
+ return 0;
+}
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+ /*
+ * Between arch_uprobe_pre_xol and arch_uprobe_post_xol, if an xol
+ * insn itself is trapped, then detect the case with the help of
+ * invalid fault code which is being set in arch_uprobe_pre_xol
+ */
+ if (t->thread.fault_code != UPROBE_INV_FAULT_CODE)
+ return true;
+
+ return false;
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ probe_opcode_t insn;
+ unsigned long addr;
+
+ if (!auprobe->simulate)
+ return false;
+
+ insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ addr = instruction_pointer(regs);
+
+ if (auprobe->api.handler)
+ auprobe->api.handler(insn, addr, regs);
+
+ return true;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ /*
+ * Task has received a fatal signal, so reset back to probbed
+ * address.
+ */
+ instruction_pointer_set(regs, utask->vaddr);
+
+ user_disable_single_step(current);
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ /*
+ * If a simple branch instruction (B) was called for retprobed
+ * assembly label then return true even when regs->sp and ret->stack
+ * are same. It will ensure that cleanup and reporting of return
+ * instances corresponding to callee label is done when
+ * handle_trampoline for called function is executed.
+ */
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->sp <= ret->stack;
+ else
+ return regs->sp < ret->stack;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+ struct pt_regs *regs)
+{
+ unsigned long orig_ret_vaddr;
+
+ orig_ret_vaddr = procedure_link_pointer(regs);
+ /* Replace the return addr with trampoline addr */
+ procedure_link_pointer_set(regs, trampoline_vaddr);
+
+ return orig_ret_vaddr;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return NOTIFY_DONE;
+}
+
+static int uprobe_breakpoint_handler(struct pt_regs *regs,
+ unsigned int esr)
+{
+ if (user_mode(regs) && uprobe_pre_sstep_notifier(regs))
+ return DBG_HOOK_HANDLED;
+
+ return DBG_HOOK_ERROR;
+}
+
+static int uprobe_single_step_handler(struct pt_regs *regs,
+ unsigned int esr)
+{
+ struct uprobe_task *utask = current->utask;
+
+ if (user_mode(regs)) {
+ WARN_ON(utask &&
+ (instruction_pointer(regs) != utask->xol_vaddr + 4));
+
+ if (uprobe_post_sstep_notifier(regs))
+ return DBG_HOOK_HANDLED;
+ }
+
+ return DBG_HOOK_ERROR;
+}
+
+/* uprobe breakpoint handler hook */
+static struct break_hook uprobes_break_hook = {
+ .esr_mask = BRK64_ESR_MASK,
+ .esr_val = BRK64_ESR_UPROBES,
+ .fn = uprobe_breakpoint_handler,
+};
+
+/* uprobe single step handler hook */
+static struct step_hook uprobes_step_hook = {
+ .fn = uprobe_single_step_handler,
+};
+
+static int __init arch_init_uprobes(void)
+{
+ register_break_hook(&uprobes_break_hook);
+ register_step_hook(&uprobes_step_hook);
+
+ return 0;
+}
+
+device_initcall(arch_init_uprobes);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 01753cd7d3f0..a3a2816ba73a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -45,6 +45,7 @@
#include <linux/personality.h>
#include <linux/notifier.h>
#include <trace/events/power.h>
+#include <linux/percpu.h>
#include <asm/alternative.h>
#include <asm/compat.h>
@@ -282,7 +283,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(childregs, 0, sizeof(struct pt_regs));
childregs->pstate = PSR_MODE_EL1h;
if (IS_ENABLED(CONFIG_ARM64_UAO) &&
- cpus_have_cap(ARM64_HAS_UAO))
+ cpus_have_const_cap(ARM64_HAS_UAO))
childregs->pstate |= PSR_UAO_BIT;
p->thread.cpu_context.x19 = stack_start;
p->thread.cpu_context.x20 = stk_sz;
@@ -322,6 +323,20 @@ void uao_thread_switch(struct task_struct *next)
}
/*
+ * We store our current task in sp_el0, which is clobbered by userspace. Keep a
+ * shadow copy so that we can restore this upon entry from userspace.
+ *
+ * This is *only* for exception entry from EL0, and is not valid until we
+ * __switch_to() a user task.
+ */
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+
+static void entry_task_switch(struct task_struct *next)
+{
+ __this_cpu_write(__entry_task, next);
+}
+
+/*
* Thread switching.
*/
struct task_struct *__switch_to(struct task_struct *prev,
@@ -333,6 +348,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
tls_thread_switch(next);
hw_breakpoint_thread_switch(next);
contextidr_thread_switch(next);
+ entry_task_switch(next);
uao_thread_switch(next);
/*
@@ -350,27 +366,35 @@ struct task_struct *__switch_to(struct task_struct *prev,
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
- unsigned long stack_page;
+ unsigned long stack_page, ret = 0;
int count = 0;
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
+ stack_page = (unsigned long)try_get_task_stack(p);
+ if (!stack_page)
+ return 0;
+
frame.fp = thread_saved_fp(p);
frame.sp = thread_saved_sp(p);
frame.pc = thread_saved_pc(p);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = p->curr_ret_stack;
#endif
- stack_page = (unsigned long)task_stack_page(p);
do {
if (frame.sp < stack_page ||
frame.sp >= stack_page + THREAD_SIZE ||
unwind_frame(p, &frame))
- return 0;
- if (!in_sched_functions(frame.pc))
- return frame.pc;
+ goto out;
+ if (!in_sched_functions(frame.pc)) {
+ ret = frame.pc;
+ goto out;
+ }
} while (count ++ < 16);
- return 0;
+
+out:
+ put_task_stack(p);
+ return ret;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index e0c81da60f76..fc35e06ccaac 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -327,13 +327,13 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
struct arch_hw_breakpoint_ctrl ctrl,
struct perf_event_attr *attr)
{
- int err, len, type, disabled = !ctrl.enabled;
+ int err, len, type, offset, disabled = !ctrl.enabled;
attr->disabled = disabled;
if (disabled)
return 0;
- err = arch_bp_generic_fields(ctrl, &len, &type);
+ err = arch_bp_generic_fields(ctrl, &len, &type, &offset);
if (err)
return err;
@@ -352,6 +352,7 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type,
attr->bp_len = len;
attr->bp_type = type;
+ attr->bp_addr += offset;
return 0;
}
@@ -404,7 +405,7 @@ static int ptrace_hbp_get_addr(unsigned int note_type,
if (IS_ERR(bp))
return PTR_ERR(bp);
- *addr = bp ? bp->attr.bp_addr : 0;
+ *addr = bp ? counter_arch_bp(bp)->address : 0;
return 0;
}
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index 1718706fde83..12a87f2600f2 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -12,6 +12,7 @@
#include <linux/export.h>
#include <linux/ftrace.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
struct return_address_data {
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f534f492a268..a53f52ac81c6 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -291,6 +291,15 @@ void __init setup_arch(char **cmdline_p)
smp_init_cpus();
smp_build_mpidr_hash();
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ /*
+ * Make sure init_thread_info.ttbr0 always generates translation
+ * faults in case uaccess_enable() is inadvertently called by the init
+ * thread.
+ */
+ init_task.thread_info.ttbr0 = virt_to_phys(empty_zero_page);
+#endif
+
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 404dd67080b9..c7b6de62f9d3 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -414,6 +414,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
} else {
local_irq_enable();
+ if (thread_flags & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
if (thread_flags & _TIF_SIGPENDING)
do_signal(regs);
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 1bec41b5fda3..df67652e46f0 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -125,9 +125,6 @@ ENTRY(_cpu_resume)
/* load sp from context */
ldr x2, [x0, #CPU_CTX_SP]
mov sp, x2
- /* save thread_info */
- and x2, x2, #~(THREAD_SIZE - 1)
- msr sp_el0, x2
/*
* cpu_do_resume expects x0 to contain context address pointer
*/
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 8507703dabe4..cb87234cfcf2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -58,6 +58,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/ipi.h>
+DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
+EXPORT_PER_CPU_SYMBOL(cpu_number);
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -146,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* We need to tell the secondary core where to find its stack and the
* page tables.
*/
+ secondary_data.task = idle;
secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
update_cpu_boot_status(CPU_MMU_OFF);
__flush_dcache_area(&secondary_data, sizeof(secondary_data));
@@ -170,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
}
+ secondary_data.task = NULL;
secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status);
if (ret && status) {
@@ -208,7 +213,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
asmlinkage void secondary_start_kernel(void)
{
struct mm_struct *mm = &init_mm;
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu;
+
+ cpu = task_cpu(current);
+ set_my_cpu_offset(per_cpu_offset(cpu));
/*
* All kernel threads share the same mm context; grab a
@@ -217,8 +225,6 @@ asmlinkage void secondary_start_kernel(void)
atomic_inc(&mm->mm_count);
current->active_mm = mm;
- set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
-
/*
* TTBR0 is only used for the identity mapping at this stage. Make it
* point to zero page to avoid speculatively fetching new entries.
@@ -718,6 +724,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
*/
for_each_possible_cpu(cpu) {
+ per_cpu(cpu_number, cpu) = cpu;
+
if (cpu == smp_processor_id())
continue;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index c2efddfca18c..8a552a33c6ef 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -22,6 +22,7 @@
#include <linux/stacktrace.h>
#include <asm/irq.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
/*
@@ -128,7 +129,6 @@ void notrace walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
break;
}
}
-EXPORT_SYMBOL(walk_stackframe);
#ifdef CONFIG_STACKTRACE
struct stack_trace_data {
@@ -181,6 +181,9 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
struct stack_trace_data data;
struct stackframe frame;
+ if (!try_get_task_stack(tsk))
+ return;
+
data.trace = trace;
data.skip = trace->skip;
@@ -202,6 +205,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
walk_stackframe(tsk, &frame, save_trace, &data);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
+
+ put_task_stack(tsk);
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index bb0cd787a9d3..1e3be9064cfa 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -47,12 +47,6 @@ void notrace __cpu_suspend_exit(void)
cpu_uninstall_idmap();
/*
- * Restore per-cpu offset before any kernel
- * subsystem relying on it has a chance to run.
- */
- set_my_cpu_offset(per_cpu_offset(cpu));
-
- /*
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 694f6deedbab..23e9e13bd2aa 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,10 +19,226 @@
#include <linux/nodemask.h>
#include <linux/of.h>
#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/cpufreq.h>
+#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/topology.h>
+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+static DEFINE_MUTEX(cpu_scale_mutex);
+
+unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
+{
+ return per_cpu(cpu_scale, cpu);
+}
+
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
+{
+ per_cpu(cpu_scale, cpu) = capacity;
+}
+
+#ifdef CONFIG_PROC_SYSCTL
+static ssize_t cpu_capacity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+
+ return sprintf(buf, "%lu\n",
+ arch_scale_cpu_capacity(NULL, cpu->dev.id));
+}
+
+static ssize_t cpu_capacity_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ int this_cpu = cpu->dev.id, i;
+ unsigned long new_capacity;
+ ssize_t ret;
+
+ if (count) {
+ ret = kstrtoul(buf, 0, &new_capacity);
+ if (ret)
+ return ret;
+ if (new_capacity > SCHED_CAPACITY_SCALE)
+ return -EINVAL;
+
+ mutex_lock(&cpu_scale_mutex);
+ for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
+ set_capacity_scale(i, new_capacity);
+ mutex_unlock(&cpu_scale_mutex);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(cpu_capacity);
+
+static int register_cpu_capacity_sysctl(void)
+{
+ int i;
+ struct device *cpu;
+
+ for_each_possible_cpu(i) {
+ cpu = get_cpu_device(i);
+ if (!cpu) {
+ pr_err("%s: too early to get CPU%d device!\n",
+ __func__, i);
+ continue;
+ }
+ device_create_file(cpu, &dev_attr_cpu_capacity);
+ }
+
+ return 0;
+}
+subsys_initcall(register_cpu_capacity_sysctl);
+#endif
+
+static u32 capacity_scale;
+static u32 *raw_capacity;
+static bool cap_parsing_failed;
+
+static void __init parse_cpu_capacity(struct device_node *cpu_node, int cpu)
+{
+ int ret;
+ u32 cpu_capacity;
+
+ if (cap_parsing_failed)
+ return;
+
+ ret = of_property_read_u32(cpu_node,
+ "capacity-dmips-mhz",
+ &cpu_capacity);
+ if (!ret) {
+ if (!raw_capacity) {
+ raw_capacity = kcalloc(num_possible_cpus(),
+ sizeof(*raw_capacity),
+ GFP_KERNEL);
+ if (!raw_capacity) {
+ pr_err("cpu_capacity: failed to allocate memory for raw capacities\n");
+ cap_parsing_failed = true;
+ return;
+ }
+ }
+ capacity_scale = max(cpu_capacity, capacity_scale);
+ raw_capacity[cpu] = cpu_capacity;
+ pr_debug("cpu_capacity: %s cpu_capacity=%u (raw)\n",
+ cpu_node->full_name, raw_capacity[cpu]);
+ } else {
+ if (raw_capacity) {
+ pr_err("cpu_capacity: missing %s raw capacity\n",
+ cpu_node->full_name);
+ pr_err("cpu_capacity: partial information: fallback to 1024 for all CPUs\n");
+ }
+ cap_parsing_failed = true;
+ kfree(raw_capacity);
+ }
+}
+
+static void normalize_cpu_capacity(void)
+{
+ u64 capacity;
+ int cpu;
+
+ if (!raw_capacity || cap_parsing_failed)
+ return;
+
+ pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
+ mutex_lock(&cpu_scale_mutex);
+ for_each_possible_cpu(cpu) {
+ pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
+ cpu, raw_capacity[cpu]);
+ capacity = (raw_capacity[cpu] << SCHED_CAPACITY_SHIFT)
+ / capacity_scale;
+ set_capacity_scale(cpu, capacity);
+ pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
+ cpu, arch_scale_cpu_capacity(NULL, cpu));
+ }
+ mutex_unlock(&cpu_scale_mutex);
+}
+
+#ifdef CONFIG_CPU_FREQ
+static cpumask_var_t cpus_to_visit;
+static bool cap_parsing_done;
+static void parsing_done_workfn(struct work_struct *work);
+static DECLARE_WORK(parsing_done_work, parsing_done_workfn);
+
+static int
+init_cpu_capacity_callback(struct notifier_block *nb,
+ unsigned long val,
+ void *data)
+{
+ struct cpufreq_policy *policy = data;
+ int cpu;
+
+ if (cap_parsing_failed || cap_parsing_done)
+ return 0;
+
+ switch (val) {
+ case CPUFREQ_NOTIFY:
+ pr_debug("cpu_capacity: init cpu capacity for CPUs [%*pbl] (to_visit=%*pbl)\n",
+ cpumask_pr_args(policy->related_cpus),
+ cpumask_pr_args(cpus_to_visit));
+ cpumask_andnot(cpus_to_visit,
+ cpus_to_visit,
+ policy->related_cpus);
+ for_each_cpu(cpu, policy->related_cpus) {
+ raw_capacity[cpu] = arch_scale_cpu_capacity(NULL, cpu) *
+ policy->cpuinfo.max_freq / 1000UL;
+ capacity_scale = max(raw_capacity[cpu], capacity_scale);
+ }
+ if (cpumask_empty(cpus_to_visit)) {
+ normalize_cpu_capacity();
+ kfree(raw_capacity);
+ pr_debug("cpu_capacity: parsing done\n");
+ cap_parsing_done = true;
+ schedule_work(&parsing_done_work);
+ }
+ }
+ return 0;
+}
+
+static struct notifier_block init_cpu_capacity_notifier = {
+ .notifier_call = init_cpu_capacity_callback,
+};
+
+static int __init register_cpufreq_notifier(void)
+{
+ if (cap_parsing_failed)
+ return -EINVAL;
+
+ if (!alloc_cpumask_var(&cpus_to_visit, GFP_KERNEL)) {
+ pr_err("cpu_capacity: failed to allocate memory for cpus_to_visit\n");
+ return -ENOMEM;
+ }
+ cpumask_copy(cpus_to_visit, cpu_possible_mask);
+
+ return cpufreq_register_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+core_initcall(register_cpufreq_notifier);
+
+static void parsing_done_workfn(struct work_struct *work)
+{
+ cpufreq_unregister_notifier(&init_cpu_capacity_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+}
+
+#else
+static int __init free_raw_capacity(void)
+{
+ kfree(raw_capacity);
+
+ return 0;
+}
+core_initcall(free_raw_capacity);
+#endif
+
static int __init get_cpu_for_node(struct device_node *node)
{
struct device_node *cpu_node;
@@ -34,6 +250,7 @@ static int __init get_cpu_for_node(struct device_node *node)
for_each_possible_cpu(cpu) {
if (of_get_cpu_node(cpu, NULL) == cpu_node) {
+ parse_cpu_capacity(cpu_node, cpu);
of_node_put(cpu_node);
return cpu;
}
@@ -178,13 +395,17 @@ static int __init parse_dt_topology(void)
* cluster with restricted subnodes.
*/
map = of_get_child_by_name(cn, "cpu-map");
- if (!map)
+ if (!map) {
+ cap_parsing_failed = true;
goto out;
+ }
ret = parse_cluster(map, 0);
if (ret != 0)
goto out_map;
+ normalize_cpu_capacity();
+
/*
* Check that all cores are in the topology; the SMP code will
* only mark cores described in the DT as possible.
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index c9986b3e0a96..5b830be79c01 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -38,6 +38,7 @@
#include <asm/esr.h>
#include <asm/insn.h>
#include <asm/traps.h>
+#include <asm/stack_pointer.h>
#include <asm/stacktrace.h>
#include <asm/exception.h>
#include <asm/system_misc.h>
@@ -147,6 +148,9 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
if (!tsk)
tsk = current;
+ if (!try_get_task_stack(tsk))
+ return;
+
/*
* Switching between stacks is valid when tracing current and in
* non-preemptible context.
@@ -212,6 +216,8 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
stack + sizeof(struct pt_regs));
}
}
+
+ put_task_stack(tsk);
}
void show_stack(struct task_struct *tsk, unsigned long *sp)
@@ -227,10 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp)
#endif
#define S_SMP " SMP"
-static int __die(const char *str, int err, struct thread_info *thread,
- struct pt_regs *regs)
+static int __die(const char *str, int err, struct pt_regs *regs)
{
- struct task_struct *tsk = thread->task;
+ struct task_struct *tsk = current;
static int die_counter;
int ret;
@@ -245,7 +250,8 @@ static int __die(const char *str, int err, struct thread_info *thread,
print_modules();
__show_regs(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
- TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1);
+ TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk),
+ end_of_stack(tsk));
if (!user_mode(regs)) {
dump_mem(KERN_EMERG, "Stack: ", regs->sp,
@@ -264,7 +270,6 @@ static DEFINE_RAW_SPINLOCK(die_lock);
*/
void die(const char *str, struct pt_regs *regs, int err)
{
- struct thread_info *thread = current_thread_info();
int ret;
oops_enter();
@@ -272,9 +277,9 @@ void die(const char *str, struct pt_regs *regs, int err)
raw_spin_lock_irq(&die_lock);
console_verbose();
bust_spinlocks(1);
- ret = __die(str, err, thread, regs);
+ ret = __die(str, err, regs);
- if (regs && kexec_should_crash(thread->task))
+ if (regs && kexec_should_crash(current))
crash_kexec(regs);
bust_spinlocks(0);
@@ -435,9 +440,10 @@ int cpu_enable_cache_maint_trap(void *__unused)
}
#define __user_cache_maint(insn, address, res) \
- if (untagged_addr(address) >= user_addr_max()) \
+ if (untagged_addr(address) >= user_addr_max()) { \
res = -EFAULT; \
- else \
+ } else { \
+ uaccess_ttbr0_enable(); \
asm volatile ( \
"1: " insn ", %1\n" \
" mov %w0, #0\n" \
@@ -449,7 +455,9 @@ int cpu_enable_cache_maint_trap(void *__unused)
" .popsection\n" \
_ASM_EXTABLE(1b, 3b) \
: "=r" (res) \
- : "r" (address), "i" (-EFAULT) )
+ : "r" (address), "i" (-EFAULT)); \
+ uaccess_ttbr0_disable(); \
+ }
static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
{
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 1105aab1e6d6..b8deffa9e1bf 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -216,6 +216,11 @@ SECTIONS
swapper_pg_dir = .;
. += SWAPPER_DIR_SIZE;
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ reserved_ttbr0 = .;
+ . += RESERVED_TTBR0_SIZE;
+#endif
+
_end = .;
STABS_DEBUG
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 6eaf12c1d627..52cb7ad9b2fd 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -16,9 +16,6 @@ menuconfig VIRTUALIZATION
if VIRTUALIZATION
-config KVM_ARM_VGIC_V3_ITS
- bool
-
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
@@ -34,7 +31,6 @@ config KVM
select KVM_VFIO
select HAVE_KVM_EVENTFD
select HAVE_KVM_IRQFD
- select KVM_ARM_VGIC_V3_ITS
select KVM_ARM_PMU if HW_PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a204adf29f0a..1bfe30dfbfe7 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -57,6 +57,16 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 1;
}
+/*
+ * Guest access to FP/ASIMD registers are routed to this handler only
+ * when the system doesn't support FP/ASIMD.
+ */
+static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+ kvm_inject_undefined(vcpu);
+ return 1;
+}
+
/**
* kvm_handle_wfx - handle a wait-for-interrupts or wait-for-event
* instruction executed by a guest
@@ -144,6 +154,7 @@ static exit_handle_fn arm_exit_handlers[] = {
[ESR_ELx_EC_BREAKPT_LOW]= kvm_handle_guest_debug,
[ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
[ESR_ELx_EC_BRK64] = kvm_handle_guest_debug,
+ [ESR_ELx_EC_FP_ASIMD] = handle_no_fpsimd,
};
static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 4e92399f7105..5e9052f087f2 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -106,9 +106,16 @@ el1_trap:
* x0: ESR_EC
*/
- /* Guest accessed VFP/SIMD registers, save host, restore Guest */
+ /*
+ * We trap the first access to the FP/SIMD to save the host context
+ * and restore the guest context lazily.
+ * If FP/SIMD is not implemented, handle the trap and inject an
+ * undefined instruction exception to the guest.
+ */
+alternative_if_not ARM64_HAS_NO_FPSIMD
cmp x0, #ESR_ELx_EC_FP_ASIMD
b.eq __fpsimd_guest_restore
+alternative_else_nop_endif
mrs x1, tpidr_el2
mov x0, #ARM_EXCEPTION_TRAP
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 83037cd62d01..75e83dd40d43 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -21,6 +21,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/fpsimd.h>
static bool __hyp_text __fpsimd_enabled_nvhe(void)
{
@@ -76,16 +77,24 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
* traps are only taken to EL2 if the operation would not otherwise
* trap to EL1. Therefore, always make sure that for 32-bit guests,
* we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+ * it will cause an exception.
*/
val = vcpu->arch.hcr_el2;
- if (!(val & HCR_RW)) {
+ if (!(val & HCR_RW) && system_supports_fpsimd()) {
write_sysreg(1 << 30, fpexc32_el2);
isb();
}
write_sysreg(val, hcr_el2);
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
- /* Make sure we trap PMU access from EL0 to EL2 */
+ /*
+ * Make sure we trap PMU access from EL0 to EL2. Also sanitize
+ * PMSELR_EL0 to make sure it never contains the cycle
+ * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+ * EL1 instead of being trapped to EL2.
+ */
+ write_sysreg(0, pmselr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
__activate_traps_arch()();
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5bc460884639..e95d4f68bf54 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -86,12 +86,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_VCPU_ATTRIBUTES:
r = 1;
break;
- case KVM_CAP_MSI_DEVID:
- if (!kvm)
- r = -EINVAL;
- else
- r = kvm->arch.vgic.msis_require_devid;
- break;
default:
r = 0;
}
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 5d1cad3ce6d6..d7150e30438a 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -17,10 +17,7 @@
*/
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
.text
@@ -33,8 +30,7 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x2, x3
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -54,8 +50,7 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x2
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4fd67ea03bb0..cfe13396085b 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -16,11 +16,8 @@
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
#include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
/*
* Copy from user space to a kernel buffer (alignment handled by the hardware)
@@ -67,12 +64,10 @@
end .req x5
ENTRY(__arch_copy_from_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x3, x4
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x3
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index f7292dd08c84..718b1c4e2f85 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -18,11 +18,8 @@
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
#include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
/*
* Copy from user space to user space (alignment handled by the hardware)
@@ -68,12 +65,10 @@
end .req x5
ENTRY(__copy_in_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x3, x4
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x3
mov x0, #0
ret
ENDPROC(__copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 7a7efe255034..e99e31c9acac 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -16,11 +16,8 @@
#include <linux/linkage.h>
-#include <asm/alternative.h>
-#include <asm/assembler.h>
#include <asm/cache.h>
-#include <asm/cpufeature.h>
-#include <asm/sysreg.h>
+#include <asm/uaccess.h>
/*
* Copy to user space from a kernel buffer (alignment handled by the hardware)
@@ -66,12 +63,10 @@
end .req x5
ENTRY(__arch_copy_to_user)
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_enable_not_uao x3, x4
add end, x0, x2
#include "copy_template.S"
-ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_ALT_PAN_NOT_UAO, \
- CONFIG_ARM64_PAN)
+ uaccess_disable_not_uao x3
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 54bb209cae8e..e703fb9defad 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -3,7 +3,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \
ioremap.o mmap.o pgd.o mmu.o \
context.o proc.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_ARM64_PTDUMP) += dump.o
+obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o
+obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 58b5a906ff78..da9576932322 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -23,6 +23,7 @@
#include <asm/assembler.h>
#include <asm/cpufeature.h>
#include <asm/alternative.h>
+#include <asm/uaccess.h>
/*
* flush_icache_range(start,end)
@@ -48,6 +49,7 @@ ENTRY(flush_icache_range)
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
+ uaccess_ttbr0_enable x2, x3
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
@@ -69,10 +71,12 @@ USER(9f, ic ivau, x4 ) // invalidate I line PoU
dsb ish
isb
mov x0, #0
+1:
+ uaccess_ttbr0_disable x1
ret
9:
mov x0, #-EFAULT
- ret
+ b 1b
ENDPROC(flush_icache_range)
ENDPROC(__flush_cache_user_range)
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index efcf1f7ef1e4..4c63cb154859 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -221,7 +221,12 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
- cpu_switch_mm(mm->pgd, mm);
+ /*
+ * Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
+ * emulating PAN.
+ */
+ if (!system_uses_ttbr0_pan())
+ cpu_switch_mm(mm->pgd, mm);
}
static int asids_init(void)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3f74d0d98de6..aa6c8f834d9e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -938,11 +938,6 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
void arch_teardown_dma_ops(struct device *dev)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-
- if (WARN_ON(domain))
- iommu_detach_device(domain, dev);
-
dev->archdata.dma_ops = NULL;
}
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 9c3e75df2180..ca74a2aace42 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -50,6 +50,18 @@ static const struct addr_marker address_markers[] = {
{ -1, NULL },
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ if (m) \
+ seq_printf(m, fmt); \
+})
+
/*
* The page dumper groups page table entries of the same type into a single
* description. It uses pg_state to track the range information while
@@ -62,6 +74,9 @@ struct pg_state {
unsigned long start_address;
unsigned level;
u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+ unsigned long uxn_pages;
};
struct prot_bits {
@@ -186,10 +201,39 @@ static void dump_prot(struct pg_state *st, const struct prot_bits *bits,
s = bits->clear;
if (s)
- seq_printf(st->seq, " %s", s);
+ pt_dump_seq_printf(st->seq, " %s", s);
}
}
+static void note_prot_uxn(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+
+ if ((st->current_prot & PTE_UXN) == PTE_UXN)
+ return;
+
+ WARN_ONCE(1, "arm64/mm: Found non-UXN mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->uxn_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+ if ((st->current_prot & PTE_RDONLY) == PTE_RDONLY)
+ return;
+ if ((st->current_prot & PTE_PXN) == PTE_PXN)
+ return;
+
+ WARN_ONCE(1, "arm64/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
u64 val)
{
@@ -200,14 +244,16 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
st->level = level;
st->current_prot = prot;
st->start_address = addr;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
} else if (prot != st->current_prot || level != st->level ||
addr >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
if (st->current_prot) {
- seq_printf(st->seq, "0x%016lx-0x%016lx ",
+ note_prot_uxn(st, addr);
+ note_prot_wx(st, addr);
+ pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ",
st->start_address, addr);
delta = (addr - st->start_address) >> 10;
@@ -215,17 +261,17 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c %s", delta, *unit,
+ pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
pg_level[st->level].name);
if (pg_level[st->level].bits)
dump_prot(st, pg_level[st->level].bits,
pg_level[st->level].num);
- seq_puts(st->seq, "\n");
+ pt_dump_seq_puts(st->seq, "\n");
}
if (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
@@ -235,7 +281,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
if (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
}
@@ -304,9 +350,8 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
}
}
-static int ptdump_show(struct seq_file *m, void *v)
+void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
{
- struct ptdump_info *info = m->private;
struct pg_state st = {
.seq = m,
.marker = info->markers,
@@ -315,33 +360,16 @@ static int ptdump_show(struct seq_file *m, void *v)
walk_pgd(&st, info->mm, info->base_addr);
note_page(&st, 0, 0, 0);
- return 0;
}
-static int ptdump_open(struct inode *inode, struct file *file)
+static void ptdump_initialize(void)
{
- return single_open(file, ptdump_show, inode->i_private);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-int ptdump_register(struct ptdump_info *info, const char *name)
-{
- struct dentry *pe;
unsigned i, j;
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
if (pg_level[i].bits)
for (j = 0; j < pg_level[i].num; j++)
pg_level[i].mask |= pg_level[i].bits[j].mask;
-
- pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
- return pe ? 0 : -ENOMEM;
}
static struct ptdump_info kernel_ptdump_info = {
@@ -350,8 +378,30 @@ static struct ptdump_info kernel_ptdump_info = {
.base_addr = VA_START,
};
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ { 0, NULL},
+ { -1, NULL},
+ },
+ .check_wx = true,
+ };
+
+ walk_pgd(&st, &init_mm, 0);
+ note_page(&st, 0, 0, 0);
+ if (st.wx_pages || st.uxn_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
+ st.wx_pages, st.uxn_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
static int ptdump_init(void)
{
- return ptdump_register(&kernel_ptdump_info, "kernel_page_tables");
+ ptdump_initialize();
+ return ptdump_debugfs_register(&kernel_ptdump_info,
+ "kernel_page_tables");
}
device_initcall(ptdump_init);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 0f8788374815..a78a5c401806 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -269,13 +269,19 @@ out:
return fault;
}
-static inline bool is_permission_fault(unsigned int esr)
+static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs)
{
unsigned int ec = ESR_ELx_EC(esr);
unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
- return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
- (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+ if (ec != ESR_ELx_EC_DABT_CUR && ec != ESR_ELx_EC_IABT_CUR)
+ return false;
+
+ if (system_uses_ttbr0_pan())
+ return fsc_type == ESR_ELx_FSC_FAULT &&
+ (regs->pstate & PSR_PAN_BIT);
+ else
+ return fsc_type == ESR_ELx_FSC_PERM;
}
static bool is_el0_instruction_abort(unsigned int esr)
@@ -315,7 +321,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
mm_flags |= FAULT_FLAG_WRITE;
}
- if (is_permission_fault(esr) && (addr < USER_DS)) {
+ if (addr < USER_DS && is_permission_fault(esr, regs)) {
/* regs->orig_addr_limit may be 0 if we entered from EL0 */
if (regs->orig_addr_limit == KERNEL_DS)
die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
@@ -507,10 +513,10 @@ static const struct fault_info {
{ do_bad, SIGBUS, 0, "unknown 17" },
{ do_bad, SIGBUS, 0, "unknown 18" },
{ do_bad, SIGBUS, 0, "unknown 19" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
- { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
+ { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" },
{ do_bad, SIGBUS, 0, "synchronous parity error" },
{ do_bad, SIGBUS, 0, "unknown 25" },
{ do_bad, SIGBUS, 0, "unknown 26" },
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 8377329d8c97..554a2558c12e 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -25,14 +25,7 @@
#include <asm/cachetype.h>
#include <asm/tlbflush.h>
-void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- if (vma->vm_flags & VM_EXEC)
- __flush_icache_all();
-}
-
-static void sync_icache_aliases(void *kaddr, unsigned long len)
+void sync_icache_aliases(void *kaddr, unsigned long len)
{
unsigned long addr = (unsigned long)kaddr;
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 2e49bd252fe7..964b7549af5c 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -51,20 +51,8 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr,
*pgsize = PAGE_SIZE;
if (!pte_cont(pte))
return 1;
- if (!pgd_present(*pgd)) {
- VM_BUG_ON(!pgd_present(*pgd));
- return 1;
- }
pud = pud_offset(pgd, addr);
- if (!pud_present(*pud)) {
- VM_BUG_ON(!pud_present(*pud));
- return 1;
- }
pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- VM_BUG_ON(!pmd_present(*pmd));
- return 1;
- }
if ((pte_t *)pmd == ptep) {
*pgsize = PMD_SIZE;
return CONT_PMDS;
@@ -212,7 +200,7 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
/* save the 1st pte to return */
pte = ptep_get_and_clear(mm, addr, cpte);
- for (i = 1; i < ncontig; ++i) {
+ for (i = 1, addr += pgsize; i < ncontig; ++i, addr += pgsize) {
/*
* If HW_AFDBM is enabled, then the HW could
* turn on the dirty bit for any of the page
@@ -250,7 +238,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
pfn = pte_pfn(*cpte);
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
*cpte, &pgsize);
- for (i = 0; i < ncontig; ++i, ++cpte) {
+ for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize) {
changed = ptep_set_access_flags(vma, addr, cpte,
pfn_pte(pfn,
hugeprot),
@@ -273,7 +261,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
cpte = huge_pte_offset(mm, addr);
ncontig = find_num_contig(mm, addr, cpte, *cpte, &pgsize);
- for (i = 0; i < ncontig; ++i, ++cpte)
+ for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
ptep_set_wrprotect(mm, addr, cpte);
} else {
ptep_set_wrprotect(mm, addr, ptep);
@@ -291,7 +279,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
cpte = huge_pte_offset(vma->vm_mm, addr);
ncontig = find_num_contig(vma->vm_mm, addr, cpte,
*cpte, &pgsize);
- for (i = 0; i < ncontig; ++i, ++cpte)
+ for (i = 0; i < ncontig; ++i, ++cpte, addr += pgsize)
ptep_clear_flush(vma, addr, cpte);
} else {
ptep_clear_flush(vma, addr, ptep);
@@ -323,7 +311,7 @@ __setup("hugepagesz=", setup_hugepagesz);
static __init int add_default_hugepagesz(void)
{
if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
- hugetlb_add_hstate(CONT_PMD_SHIFT);
+ hugetlb_add_hstate(CONT_PTE_SHIFT);
return 0;
}
arch_initcall(add_default_hugepagesz);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 05615a3fdc6f..17243e43184e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -28,8 +28,6 @@
#include <linux/memblock.h>
#include <linux/fs.h>
#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/stop_machine.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@@ -42,6 +40,7 @@
#include <asm/tlb.h>
#include <asm/memblock.h>
#include <asm/mmu_context.h>
+#include <asm/ptdump.h>
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
@@ -95,11 +94,24 @@ static phys_addr_t __init early_pgtable_alloc(void)
return phys;
}
+static bool pgattr_change_is_safe(u64 old, u64 new)
+{
+ /*
+ * The following mapping attributes may be updated in live
+ * kernel mappings without the need for break-before-make.
+ */
+ static const pteval_t mask = PTE_PXN | PTE_RDONLY | PTE_WRITE;
+
+ return old == 0 || new == 0 || ((old ^ new) & ~mask) == 0;
+}
+
static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void))
+ phys_addr_t (*pgtable_alloc)(void),
+ bool page_mappings_only)
{
+ pgprot_t __prot = prot;
pte_t *pte;
BUG_ON(pmd_sect(*pmd));
@@ -115,8 +127,28 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
pte = pte_set_fixmap_offset(pmd, addr);
do {
- set_pte(pte, pfn_pte(pfn, prot));
+ pte_t old_pte = *pte;
+
+ /*
+ * Set the contiguous bit for the subsequent group of PTEs if
+ * its size and alignment are appropriate.
+ */
+ if (((addr | PFN_PHYS(pfn)) & ~CONT_PTE_MASK) == 0) {
+ if (end - addr >= CONT_PTE_SIZE && !page_mappings_only)
+ __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
+ else
+ __prot = prot;
+ }
+
+ set_pte(pte, pfn_pte(pfn, __prot));
pfn++;
+
+ /*
+ * After the PTE entry has been populated once, we
+ * only allow updates to the permission attributes.
+ */
+ BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+
} while (pte++, addr += PAGE_SIZE, addr != end);
pte_clear_fixmap();
@@ -125,8 +157,9 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
- bool allow_block_mappings)
+ bool page_mappings_only)
{
+ pgprot_t __prot = prot;
pmd_t *pmd;
unsigned long next;
@@ -146,27 +179,39 @@ static void alloc_init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
pmd = pmd_set_fixmap_offset(pud, addr);
do {
+ pmd_t old_pmd = *pmd;
+
next = pmd_addr_end(addr, end);
+
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
- allow_block_mappings) {
- pmd_t old_pmd =*pmd;
- pmd_set_huge(pmd, phys, prot);
+ !page_mappings_only) {
/*
- * Check for previous table entries created during
- * boot (__create_page_tables) and flush them.
+ * Set the contiguous bit for the subsequent group of
+ * PMDs if its size and alignment are appropriate.
*/
- if (!pmd_none(old_pmd)) {
- flush_tlb_all();
- if (pmd_table(old_pmd)) {
- phys_addr_t table = pmd_page_paddr(old_pmd);
- if (!WARN_ON_ONCE(slab_is_available()))
- memblock_free(table, PAGE_SIZE);
- }
+ if (((addr | phys) & ~CONT_PMD_MASK) == 0) {
+ if (end - addr >= CONT_PMD_SIZE)
+ __prot = __pgprot(pgprot_val(prot) |
+ PTE_CONT);
+ else
+ __prot = prot;
}
+ pmd_set_huge(pmd, phys, __prot);
+
+ /*
+ * After the PMD entry has been populated once, we
+ * only allow updates to the permission attributes.
+ */
+ BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
+ pmd_val(*pmd)));
} else {
alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
- prot, pgtable_alloc);
+ prot, pgtable_alloc,
+ page_mappings_only);
+
+ BUG_ON(pmd_val(old_pmd) != 0 &&
+ pmd_val(old_pmd) != pmd_val(*pmd));
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
@@ -189,7 +234,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
- bool allow_block_mappings)
+ bool page_mappings_only)
{
pud_t *pud;
unsigned long next;
@@ -204,33 +249,28 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
pud = pud_set_fixmap_offset(pgd, addr);
do {
+ pud_t old_pud = *pud;
+
next = pud_addr_end(addr, end);
/*
* For 4K granule only, attempt to put down a 1GB block
*/
- if (use_1G_block(addr, next, phys) && allow_block_mappings) {
- pud_t old_pud = *pud;
+ if (use_1G_block(addr, next, phys) && !page_mappings_only) {
pud_set_huge(pud, phys, prot);
/*
- * If we have an old value for a pud, it will
- * be pointing to a pmd table that we no longer
- * need (from swapper_pg_dir).
- *
- * Look up the old pmd table and free it.
+ * After the PUD entry has been populated once, we
+ * only allow updates to the permission attributes.
*/
- if (!pud_none(old_pud)) {
- flush_tlb_all();
- if (pud_table(old_pud)) {
- phys_addr_t table = pud_page_paddr(old_pud);
- if (!WARN_ON_ONCE(slab_is_available()))
- memblock_free(table, PAGE_SIZE);
- }
- }
+ BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
+ pud_val(*pud)));
} else {
alloc_init_pmd(pud, addr, next, phys, prot,
- pgtable_alloc, allow_block_mappings);
+ pgtable_alloc, page_mappings_only);
+
+ BUG_ON(pud_val(old_pud) != 0 &&
+ pud_val(old_pud) != pud_val(*pud));
}
phys += next - addr;
} while (pud++, addr = next, addr != end);
@@ -242,7 +282,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
phys_addr_t (*pgtable_alloc)(void),
- bool allow_block_mappings)
+ bool page_mappings_only)
{
unsigned long addr, length, end, next;
pgd_t *pgd = pgd_offset_raw(pgdir, virt);
@@ -262,7 +302,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
- allow_block_mappings);
+ page_mappings_only);
phys += next - addr;
} while (pgd++, addr = next, addr != end);
}
@@ -291,17 +331,17 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
&phys, virt);
return;
}
- __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, true);
+ __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false);
}
void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
- pgprot_t prot, bool allow_block_mappings)
+ pgprot_t prot, bool page_mappings_only)
{
BUG_ON(mm == &init_mm);
__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
- pgd_pgtable_alloc, allow_block_mappings);
+ pgd_pgtable_alloc, page_mappings_only);
}
static void create_mapping_late(phys_addr_t phys, unsigned long virt,
@@ -314,7 +354,7 @@ static void create_mapping_late(phys_addr_t phys, unsigned long virt,
}
__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot,
- NULL, !debug_pagealloc_enabled());
+ NULL, debug_pagealloc_enabled());
}
static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end)
@@ -332,7 +372,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
__create_pgd_mapping(pgd, start, __phys_to_virt(start),
end - start, PAGE_KERNEL,
early_pgtable_alloc,
- !debug_pagealloc_enabled());
+ debug_pagealloc_enabled());
return;
}
@@ -345,13 +385,13 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
__phys_to_virt(start),
kernel_start - start, PAGE_KERNEL,
early_pgtable_alloc,
- !debug_pagealloc_enabled());
+ debug_pagealloc_enabled());
if (kernel_end < end)
__create_pgd_mapping(pgd, kernel_end,
__phys_to_virt(kernel_end),
end - kernel_end, PAGE_KERNEL,
early_pgtable_alloc,
- !debug_pagealloc_enabled());
+ debug_pagealloc_enabled());
/*
* Map the linear alias of the [_text, __init_begin) interval as
@@ -361,7 +401,7 @@ static void __init __map_memblock(pgd_t *pgd, phys_addr_t start, phys_addr_t end
*/
__create_pgd_mapping(pgd, kernel_start, __phys_to_virt(kernel_start),
kernel_end - kernel_start, PAGE_KERNEL_RO,
- early_pgtable_alloc, !debug_pagealloc_enabled());
+ early_pgtable_alloc, debug_pagealloc_enabled());
}
static void __init map_mem(pgd_t *pgd)
@@ -396,6 +436,11 @@ void mark_rodata_ro(void)
section_size = (unsigned long)__init_begin - (unsigned long)__start_rodata;
create_mapping_late(__pa(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
+
+ /* flush the TLBs after updating live kernel mappings */
+ flush_tlb_all();
+
+ debug_checkwx();
}
static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
@@ -408,7 +453,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
BUG_ON(!PAGE_ALIGNED(size));
__create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
- early_pgtable_alloc, !debug_pagealloc_enabled());
+ early_pgtable_alloc, debug_pagealloc_enabled());
vma->addr = va_start;
vma->phys_addr = pa_start;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 352c73b6a59e..32682be978e0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,11 +70,14 @@ ENTRY(cpu_do_suspend)
mrs x8, mdscr_el1
mrs x9, oslsr_el1
mrs x10, sctlr_el1
+ mrs x11, tpidr_el1
+ mrs x12, sp_el0
stp x2, x3, [x0]
stp x4, xzr, [x0, #16]
stp x5, x6, [x0, #32]
stp x7, x8, [x0, #48]
stp x9, x10, [x0, #64]
+ stp x11, x12, [x0, #80]
ret
ENDPROC(cpu_do_suspend)
@@ -90,6 +93,7 @@ ENTRY(cpu_do_resume)
ldp x6, x8, [x0, #32]
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
+ ldp x13, x14, [x0, #80]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
@@ -112,6 +116,8 @@ ENTRY(cpu_do_resume)
msr mdscr_el1, x10
msr sctlr_el1, x12
+ msr tpidr_el1, x13
+ msr sp_el0, x14
/*
* Restore oslsr_el1 by writing oslar_el1
*/
@@ -136,11 +142,7 @@ ENTRY(cpu_do_switch_mm)
bfi x0, x1, #48, #16 // set the ASID
msr ttbr0_el1, x0 // set TTBR0
isb
-alternative_if ARM64_WORKAROUND_CAVIUM_27456
- ic iallu
- dsb nsh
- isb
-alternative_else_nop_endif
+ post_ttbr0_update_workaround
ret
ENDPROC(cpu_do_switch_mm)
diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c
new file mode 100644
index 000000000000..eee4d864350c
--- /dev/null
+++ b/arch/arm64/mm/ptdump_debugfs.c
@@ -0,0 +1,31 @@
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/ptdump.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct ptdump_info *info = m->private;
+ ptdump_walk_pgd(m, info);
+ return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, ptdump_show, inode->i_private);
+}
+
+static const struct file_operations ptdump_fops = {
+ .open = ptdump_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+{
+ struct dentry *pe;
+ pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+ return pe ? 0 : -ENOMEM;
+
+}
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 329c8027b0a9..b41aff25426d 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -49,6 +49,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
+#include <asm/uaccess.h>
#include <xen/interface/xen.h>
@@ -91,6 +92,20 @@ ENTRY(privcmd_call)
mov x2, x3
mov x3, x4
mov x4, x5
+ /*
+ * Privcmd calls are issued by the userspace. The kernel needs to
+ * enable access to TTBR0_EL1 as the hypervisor would issue stage 1
+ * translations to user memory via AT instructions. Since AT
+ * instructions are not affected by the PAN bit (ARMv8.1), we only
+ * need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
+ * is enabled (it implies that hardware UAO and PAN disabled).
+ */
+ uaccess_ttbr0_enable x6, x7
hvc XEN_IMM
+
+ /*
+ * Disable userspace access from kernel once the hyp call completed.
+ */
+ uaccess_ttbr0_disable x6
ret
ENDPROC(privcmd_call);
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 8a2543c654b3..cf27554e76bf 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -1,5 +1,7 @@
/* Load firmware into Core B on a BF561
*
+ * Author: Bas Vermeulen <bvermeul@blackstar.xs4all.nl>
+ *
* Copyright 2004-2009 Analog Devices Inc.
* Licensed under the GPL-2 or later.
*/
@@ -14,9 +16,9 @@
#include <linux/device.h>
#include <linux/fs.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
-#include <linux/module.h>
#define CMD_COREB_START _IO('b', 0)
#define CMD_COREB_STOP _IO('b', 1)
@@ -59,8 +61,4 @@ static struct miscdevice coreb_dev = {
.name = "coreb",
.fops = &coreb_fops,
};
-module_misc_device(coreb_dev);
-
-MODULE_AUTHOR("Bas Vermeulen <bvermeul@blackstar.xs4all.nl>");
-MODULE_DESCRIPTION("BF561 Core B Support");
-MODULE_LICENSE("GPL");
+builtin_misc_device(coreb_dev);
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index fae2f9447792..6080582a26d1 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -341,7 +341,7 @@ void output_pm_defines(void)
void output_kvm_defines(void)
{
- COMMENT(" KVM/MIPS Specfic offsets. ");
+ COMMENT(" KVM/MIPS Specific offsets. ");
OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e407af2b7333..2e6a823fa502 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -70,7 +70,9 @@
#define HPTE_V_SSIZE_SHIFT 62
#define HPTE_V_AVPN_SHIFT 7
+#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff)
#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
+#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80)
#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
@@ -80,14 +82,16 @@
#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
/*
- * ISA 3.0 have a different HPTE format.
+ * ISA 3.0 has a different HPTE format.
*/
#define HPTE_R_3_0_SSIZE_SHIFT 58
+#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT)
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
#define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
+#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
#define HPTE_R_PP ASM_CONST(0x0000000000000003)
#define HPTE_R_PPP ASM_CONST(0x8000000000000003)
#define HPTE_R_N ASM_CONST(0x0000000000000004)
@@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
*/
v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
v <<= HPTE_V_AVPN_SHIFT;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+ v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v;
}
/*
+ * ISA v3.0 defines a new HPTE format, which differs from the old
+ * format in having smaller AVPN and ARPN fields, and the B field
+ * in the second dword instead of the first.
+ */
+static inline unsigned long hpte_old_to_new_v(unsigned long v)
+{
+ /* trim AVPN, drop B */
+ return v & HPTE_V_COMMON_BITS;
+}
+
+static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
+{
+ /* move B field from 1st to 2nd dword, trim ARPN */
+ return (r & ~HPTE_R_3_0_SSIZE_MASK) |
+ (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
+}
+
+static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
+{
+ /* insert B field */
+ return (v & HPTE_V_COMMON_BITS) |
+ ((r & HPTE_R_3_0_SSIZE_MASK) <<
+ (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
+}
+
+static inline unsigned long hpte_new_to_old_r(unsigned long r)
+{
+ /* clear out B field */
+ return r & ~HPTE_R_3_0_SSIZE_MASK;
+}
+
+/*
* This function sets the AVPN and L fields of the HPTE appropriately
* using the base page size and actual page size.
*/
@@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
* aligned for the requested page size
*/
static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
- int actual_psize, int ssize)
+ int actual_psize)
{
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
-
/* A 4K page needs no special encoding */
if (actual_psize == MMU_PAGE_4K)
return pa & HPTE_R_RPN;
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 05cabed3d1bd..09a802bb702f 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -99,6 +99,7 @@
#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40
#define BOOK3S_INTERRUPT_HMI 0xe60
#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80
+#define BOOK3S_INTERRUPT_H_VIRT 0xea0
#define BOOK3S_INTERRUPT_PERFMON 0xf00
#define BOOK3S_INTERRUPT_ALTIVEC 0xf20
#define BOOK3S_INTERRUPT_VSX 0xf40
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 28350a294b1e..e59b172666cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -48,7 +48,7 @@
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
-#define KVM_HALT_POLL_NS_DEFAULT 500000
+#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
/* These values are internal and can be increased later */
#define KVM_NR_IRQCHIPS 1
@@ -244,8 +244,10 @@ struct kvm_arch_memory_slot {
struct kvm_arch {
unsigned int lpid;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ unsigned int tlb_sets;
unsigned long hpt_virt;
struct revmap_entry *revmap;
+ atomic64_t mmio_update;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
@@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2
+#define MMIO_HPTE_CACHE_SIZE 4
+
+struct mmio_hpte_cache_entry {
+ unsigned long hpte_v;
+ unsigned long hpte_r;
+ unsigned long rpte;
+ unsigned long pte_index;
+ unsigned long eaddr;
+ unsigned long slb_v;
+ long mmio_update;
+ unsigned int slb_base_pshift;
+};
+
+struct mmio_hpte_cache {
+ struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
+ unsigned int index;
+};
+
struct openpic;
struct kvm_vcpu_arch {
@@ -498,6 +518,8 @@ struct kvm_vcpu_arch {
ulong tcscr;
ulong acop;
ulong wort;
+ ulong tid;
+ ulong psscr;
ulong shadow_srr1;
#endif
u32 vrsave; /* also USPRG0 */
@@ -546,6 +568,7 @@ struct kvm_vcpu_arch {
u64 tfiar;
u32 cr_tm;
+ u64 xer_tm;
u64 lr_tm;
u64 ctr_tm;
u64 amr_tm;
@@ -655,9 +678,11 @@ struct kvm_vcpu_arch {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
struct kvm_vcpu_arch_shared shregs;
+ struct mmio_hpte_cache mmio_cache;
unsigned long pgfault_addr;
long pgfault_index;
unsigned long pgfault_hpte[2];
+ struct mmio_hpte_cache_entry *pgfault_cache;
struct task_struct *run_task;
struct kvm_run *kvm_run;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f6e49640dbe1..2da67bf1f2ec 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq);
extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq);
-extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
- struct kvmppc_irq_map *irq_map,
- struct kvmppc_passthru_irqmap *pimap);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
+ struct kvmppc_irq_map *irq_map,
+ struct kvmppc_passthru_irqmap *pimap,
+ bool *again);
extern int h_ipi_redirect;
#else
static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@@ -510,6 +511,48 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
#endif
/*
+ * Prototypes for functions called only from assembler code.
+ * Having prototypes reduces sparse errors.
+ */
+long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce);
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+ unsigned int yield_count);
+long kvmppc_h_random(struct kvm_vcpu *vcpu);
+void kvmhv_commence_exit(int trap);
+long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_subcore_enter_guest(void);
+void kvmppc_subcore_exit_guest(void);
+long kvmppc_realmode_hmi_handler(void);
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel);
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn);
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va);
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long slb_v, unsigned int status, bool data);
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr);
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+/*
* Host-side operations we want to set up while running in real
* mode in the guest operating on the xics.
* Currently only VCPU wakeup is supported.
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e311c25751a4..8d1499334257 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -214,6 +214,11 @@ extern u64 ppc64_rma_size;
/* Cleanup function used by kexec */
extern void mmu_cleanup_all(void);
extern void radix__mmu_cleanup_all(void);
+
+/* Functions for creating and updating partition table on POWER9 */
+extern void mmu_partition_table_init(void);
+extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1);
#endif /* CONFIG_PPC64 */
struct mm_struct;
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e958b7096f19..5c7db0f1a708 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
int64_t opal_pci_poll2(uint64_t id, uint64_t data);
int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
+int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
int64_t opal_int_set_cppr(uint8_t cppr);
int64_t opal_int_eoi(uint32_t xirr);
+int64_t opal_rm_int_eoi(uint32_t xirr);
int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
+int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
uint32_t pe_num, uint32_t tce_size,
uint64_t dma_addr, uint32_t npages);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 9e1499f98def..04aa1ee8cdb6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -153,6 +153,8 @@
#define PSSCR_EC 0x00100000 /* Exit Criterion */
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
+#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
@@ -236,6 +238,7 @@
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
#define CTRL_CT 0xc0000000 /* current thread */
@@ -294,6 +297,7 @@
#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
#define SPRN_LMRR 0x32D /* Load Monitor Region Register */
#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */
+#define SPRN_ASDR 0x330 /* Access segment descriptor register */
#define SPRN_IC 0x350 /* Virtual Instruction Count */
#define SPRN_VTB 0x351 /* Virtual Time Base */
#define SPRN_LDBAR 0x352 /* LD Base Address Register */
@@ -305,6 +309,7 @@
/* HFSCR and FSCR bit numbers are the same */
#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */
+#define FSCR_MSGP_LG 10 /* Enable MSGP */
#define FSCR_TAR_LG 8 /* Enable Target Address Register */
#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
#define FSCR_TM_LG 5 /* Enable Transactional Memory */
@@ -320,6 +325,7 @@
#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
#define HFSCR_LM __MASK(FSCR_LM_LG)
+#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
#define HFSCR_TAR __MASK(FSCR_TAR_LG)
#define HFSCR_EBB __MASK(FSCR_EBB_LG)
#define HFSCR_TM __MASK(FSCR_TM_LG)
@@ -358,6 +364,7 @@
#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */
#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */
#define LPCR_MER_SH 11
+#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
#define LPCR_LPES 0x0000000c
#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
@@ -378,6 +385,12 @@
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+/*
+ * These bits are used in the function kvmppc_set_arch_compat() to specify and
+ * determine both the compatibility level which we want to emulate and the
+ * compatibility level which the host is capable of emulating.
+ */
+#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
@@ -1219,6 +1232,7 @@
#define PVR_ARCH_206 0x0f000003
#define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004
+#define PVR_ARCH_300 0x0f000005
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce379..3603b6f51b11 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -573,6 +573,10 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
@@ -596,6 +600,7 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index caec7bf3b99a..195a9fc8f81c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -487,6 +487,7 @@ int main(void)
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -548,6 +549,8 @@ int main(void)
DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
+ DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid));
+ DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
@@ -569,6 +572,7 @@ int main(void)
DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
+ DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm));
DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 37c027ca83b2..f3e1f5d29dce 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -174,7 +174,7 @@ __init_FSCR:
__init_HFSCR:
mfspr r3,SPRN_HFSCR
ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
- HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB
+ HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
mtspr SPRN_HFSCR,r3
blr
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 05f09ae82587..b795dd1ac2ef 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
/* 128 (2**7) bytes in each HPTEG */
kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
+ atomic64_set(&kvm->arch.mmio_update, 0);
+
/* Allocate reverse map array */
rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) {
@@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, msr);
}
-long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
{
@@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_slb *slbe;
unsigned long slb_v;
unsigned long pp, key;
- unsigned long v, gr;
+ unsigned long v, orig_v, gr;
__be64 *hptep;
int index;
int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
@@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return -ENOENT;
}
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+ v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
gr = kvm->arch.revmap[index].guest_rpte;
- unlock_hpte(hptep, v);
+ unlock_hpte(hptep, orig_v);
preempt_enable();
gpte->eaddr = eaddr;
@@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
struct kvm *kvm = vcpu->kvm;
unsigned long hpte[3], r;
+ unsigned long hnow_v, hnow_r;
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
@@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int writing, write_ok;
struct vm_area_struct *vma;
unsigned long rcbits;
+ long mmio_update;
/*
* Real-mode code has already searched the HPT and found the
@@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (ea != vcpu->arch.pgfault_addr)
return RESUME_GUEST;
+
+ if (vcpu->arch.pgfault_cache) {
+ mmio_update = atomic64_read(&kvm->arch.mmio_update);
+ if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
+ r = vcpu->arch.pgfault_cache->rpte;
+ psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
+ gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+ gfn_base = gpa_base >> PAGE_SHIFT;
+ gpa = gpa_base | (ea & (psize - 1));
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ dsisr & DSISR_ISSTORE);
+ }
+ }
index = vcpu->arch.pgfault_index;
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
rev = &kvm->arch.revmap[index];
@@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unlock_hpte(hptep, hpte[0]);
preempt_enable();
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
+ hpte[1] = hpte_new_to_old_r(hpte[1]);
+ }
if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
hpte[1] != vcpu->arch.pgfault_hpte[1])
return RESUME_GUEST;
@@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
+ ((pfn << PAGE_SHIFT) & ~(psize - 1));
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
- if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] ||
- be64_to_cpu(hptep[1]) != hpte[1] ||
- rev->guest_rpte != hpte[2])
+ hnow_v = be64_to_cpu(hptep[0]);
+ hnow_r = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
+ hnow_r = hpte_new_to_old_r(hnow_r);
+ }
+ if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
+ rev->guest_rpte != hpte[2])
/* HPTE has been changed under us; let the guest retry */
goto out_unlock;
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ r = hpte_old_to_new_r(hpte[0], r);
+ hpte[0] = hpte_old_to_new_v(hpte[0]);
+ }
hptep[1] = cpu_to_be64(r);
eieio();
__unlock_hpte(hptep, hpte[0]);
@@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
hpte_rpn(ptel, psize) == gfn) {
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
kvmppc_invalidate_hpte(kvm, hptep, i);
+ hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
@@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass)
{
- unsigned long v, r;
+ unsigned long v, r, hr;
unsigned long rcbits_unset;
int ok = 1;
int valid, dirty;
@@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hptp[0]);
+ hr = be64_to_cpu(hptp[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, hr);
+ hr = hpte_new_to_old_r(hr);
+ }
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
@@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
/* Harvest R and C into guest view if necessary */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
- if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) {
- revp->guest_rpte |= (be64_to_cpu(hptp[1]) &
+ if (valid && (rcbits_unset & hr)) {
+ revp->guest_rpte |= (hr &
(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
dirty = 1;
}
@@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
return ret;
}
-ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
return -EACCES;
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index d461c440889a..e4c4ea973e57 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -39,7 +39,6 @@
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
-#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 39ef1f4a7b02..8dcbe37a4dac 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -54,6 +54,9 @@
#include <asm/dbell.h>
#include <asm/hmi.h>
#include <asm/pnv-pci.h>
+#include <asm/mmu.h>
+#include <asm/opal.h>
+#include <asm/xics.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
@@ -62,6 +65,7 @@
#include <linux/irqbypass.h>
#include <linux/module.h>
#include <linux/compiler.h>
+#include <linux/of.h>
#include "book3s.h"
@@ -104,23 +108,6 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
-/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
-static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
-
-/* Factor by which the vcore halt poll interval is grown, default is to double
- */
-static unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, int, S_IRUGO);
-MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
-
-/* Factor by which the vcore halt poll interval is shrunk, default is to reset
- */
-static unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, int, S_IRUGO);
-MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
-
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -146,12 +133,21 @@ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
static bool kvmppc_ipi_thread(int cpu)
{
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+
+ /* On POWER9 we can use msgsnd to IPI any cpu */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ msg |= get_hard_smp_processor_id(cpu);
+ smp_mb();
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return true;
+ }
+
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
preempt_disable();
if (cpu_first_thread_sibling(cpu) ==
cpu_first_thread_sibling(smp_processor_id())) {
- unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
msg |= cpu_thread_in_core(cpu);
smp_mb();
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
@@ -162,8 +158,12 @@ static bool kvmppc_ipi_thread(int cpu)
}
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
- if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) {
- xics_wake_cpu(cpu);
+ if (cpu >= 0 && cpu < nr_cpu_ids) {
+ if (paca[cpu].kvm_hstate.xics_phys) {
+ xics_wake_cpu(cpu);
+ return true;
+ }
+ opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
return true;
}
#endif
@@ -299,41 +299,54 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
vcpu->arch.pvr = pvr;
}
+/* Dummy value used in computing PCR value below */
+#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
+
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
- unsigned long pcr = 0;
+ unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ /* We can (emulate) our own architecture version and anything older */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ host_pcr_bit = PCR_ARCH_300;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ host_pcr_bit = PCR_ARCH_207;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ host_pcr_bit = PCR_ARCH_206;
+ else
+ host_pcr_bit = PCR_ARCH_205;
+
+ /* Determine lowest PCR bit needed to run guest in given PVR level */
+ guest_pcr_bit = host_pcr_bit;
if (arch_compat) {
switch (arch_compat) {
case PVR_ARCH_205:
- /*
- * If an arch bit is set in PCR, all the defined
- * higher-order arch bits also have to be set.
- */
- pcr = PCR_ARCH_206 | PCR_ARCH_205;
+ guest_pcr_bit = PCR_ARCH_205;
break;
case PVR_ARCH_206:
case PVR_ARCH_206p:
- pcr = PCR_ARCH_206;
+ guest_pcr_bit = PCR_ARCH_206;
break;
case PVR_ARCH_207:
+ guest_pcr_bit = PCR_ARCH_207;
+ break;
+ case PVR_ARCH_300:
+ guest_pcr_bit = PCR_ARCH_300;
break;
default:
return -EINVAL;
}
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
- /* POWER7 can't emulate POWER8 */
- if (!(pcr & PCR_ARCH_206))
- return -EINVAL;
- pcr &= ~PCR_ARCH_206;
- }
}
+ /* Check requested PCR bits don't exceed our capabilities */
+ if (guest_pcr_bit > host_pcr_bit)
+ return -EINVAL;
+
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
- vc->pcr = pcr;
+ /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
+ vc->pcr = host_pcr_bit - guest_pcr_bit;
spin_unlock(&vc->lock);
return 0;
@@ -945,6 +958,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOK3S_INTERRUPT_EXTERNAL:
case BOOK3S_INTERRUPT_H_DOORBELL:
+ case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -1229,6 +1243,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_WORT:
*val = get_reg_val(id, vcpu->arch.wort);
break;
+ case KVM_REG_PPC_TIDR:
+ *val = get_reg_val(id, vcpu->arch.tid);
+ break;
+ case KVM_REG_PPC_PSSCR:
+ *val = get_reg_val(id, vcpu->arch.psscr);
+ break;
case KVM_REG_PPC_VPA_ADDR:
spin_lock(&vcpu->arch.vpa_update_lock);
*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
@@ -1288,6 +1308,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_TM_CR:
*val = get_reg_val(id, vcpu->arch.cr_tm);
break;
+ case KVM_REG_PPC_TM_XER:
+ *val = get_reg_val(id, vcpu->arch.xer_tm);
+ break;
case KVM_REG_PPC_TM_LR:
*val = get_reg_val(id, vcpu->arch.lr_tm);
break;
@@ -1427,6 +1450,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_WORT:
vcpu->arch.wort = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_TIDR:
+ vcpu->arch.tid = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PSSCR:
+ vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
+ break;
case KVM_REG_PPC_VPA_ADDR:
addr = set_reg_val(id, *val);
r = -EINVAL;
@@ -1498,6 +1527,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_TM_CR:
vcpu->arch.cr_tm = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_TM_XER:
+ vcpu->arch.xer_tm = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_TM_LR:
vcpu->arch.lr_tm = set_reg_val(id, *val);
break;
@@ -1540,6 +1572,20 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
return r;
}
+/*
+ * On POWER9, threads are independent and can be in different partitions.
+ * Therefore we consider each thread to be a subcore.
+ * There is a restriction that all threads have to be in the same
+ * MMU mode (radix or HPT), unfortunately, but since we only support
+ * HPT guests on a HPT host so far, that isn't an impediment yet.
+ */
+static int threads_per_vcore(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return 1;
+ return threads_per_subcore;
+}
+
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
{
struct kvmppc_vcore *vcore;
@@ -1554,7 +1600,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * threads_per_subcore;
+ vcore->first_vcpuid = core * threads_per_vcore();
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1717,7 +1763,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
int core;
struct kvmppc_vcore *vcore;
- core = id / threads_per_subcore;
+ core = id / threads_per_vcore();
if (core >= KVM_MAX_VCORES)
goto out;
@@ -1935,7 +1981,10 @@ static void kvmppc_wait_for_nap(void)
{
int cpu = smp_processor_id();
int i, loops;
+ int n_threads = threads_per_vcore();
+ if (n_threads <= 1)
+ return;
for (loops = 0; loops < 1000000; ++loops) {
/*
* Check if all threads are finished.
@@ -1943,17 +1992,17 @@ static void kvmppc_wait_for_nap(void)
* and the thread clears it when finished, so we look
* for any threads that still have a non-NULL vcore ptr.
*/
- for (i = 1; i < threads_per_subcore; ++i)
+ for (i = 1; i < n_threads; ++i)
if (paca[cpu + i].kvm_hstate.kvm_vcore)
break;
- if (i == threads_per_subcore) {
+ if (i == n_threads) {
HMT_medium();
return;
}
HMT_low();
}
HMT_medium();
- for (i = 1; i < threads_per_subcore; ++i)
+ for (i = 1; i < n_threads; ++i)
if (paca[cpu + i].kvm_hstate.kvm_vcore)
pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
@@ -2019,7 +2068,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_PREEMPT;
vc->pcpu = smp_processor_id();
- if (vc->num_threads < threads_per_subcore) {
+ if (vc->num_threads < threads_per_vcore()) {
spin_lock(&lp->lock);
list_add_tail(&vc->preempt_list, &lp->list);
spin_unlock(&lp->lock);
@@ -2123,8 +2172,7 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
cip->subcore_threads[sub] = vc->num_threads;
cip->subcore_vm[sub] = vc->kvm;
init_master_vcore(vc);
- list_del(&vc->preempt_list);
- list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
+ list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
return true;
}
@@ -2309,6 +2357,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
unsigned long cmd_bit, stat_bit;
int pcpu, thr;
int target_threads;
+ int controlled_threads;
/*
* Remove from the list any threads that have a signal pending
@@ -2327,11 +2376,18 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->preempt_tb = TB_NIL;
/*
+ * Number of threads that we will be controlling: the same as
+ * the number of threads per subcore, except on POWER9,
+ * where it's 1 because the threads are (mostly) independent.
+ */
+ controlled_threads = threads_per_vcore();
+
+ /*
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
*/
- if ((threads_per_core > 1) &&
+ if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
@@ -2347,7 +2403,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
init_core_info(&core_info, vc);
pcpu = smp_processor_id();
- target_threads = threads_per_subcore;
+ target_threads = controlled_threads;
if (target_smt_mode && target_smt_mode < target_threads)
target_threads = target_smt_mode;
if (vc->num_threads < target_threads)
@@ -2383,7 +2439,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
smp_wmb();
}
pcpu = smp_processor_id();
- for (thr = 0; thr < threads_per_subcore; ++thr)
+ for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
/* Initiate micro-threading (split-core) if required */
@@ -2493,7 +2549,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
/* Let secondaries go back to the offline loop */
- for (i = 0; i < threads_per_subcore; ++i) {
+ for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
if (sip && sip->napped[i])
kvmppc_ipi_thread(pcpu + i);
@@ -2545,9 +2601,6 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns = 10000;
else
vc->halt_poll_ns *= halt_poll_ns_grow;
-
- if (vc->halt_poll_ns > halt_poll_max_ns)
- vc->halt_poll_ns = halt_poll_max_ns;
}
static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
@@ -2558,7 +2611,8 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
-/* Check to see if any of the runnable vcpus on the vcore have pending
+/*
+ * Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
*/
static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
@@ -2657,16 +2711,18 @@ out:
}
/* Adjust poll time */
- if (halt_poll_max_ns) {
+ if (halt_poll_ns) {
if (block_ns <= vc->halt_poll_ns)
;
/* We slept and blocked for longer than the max halt time */
- else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
+ else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
shrink_halt_poll_ns(vc);
/* We slept and our poll time is too small */
- else if (vc->halt_poll_ns < halt_poll_max_ns &&
- block_ns < halt_poll_max_ns)
+ else if (vc->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
grow_halt_poll_ns(vc);
+ if (vc->halt_poll_ns > halt_poll_ns)
+ vc->halt_poll_ns = halt_poll_ns;
} else
vc->halt_poll_ns = 0;
@@ -2973,6 +3029,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
+ /*
+ * If we are making a new memslot, it might make
+ * some address that was previously cached as emulated
+ * MMIO be no longer emulated MMIO, so invalidate
+ * all the caches of emulated MMIO translations.
+ */
+ if (npages)
+ atomic64_inc(&kvm->arch.mmio_update);
+
if (npages && old->npages) {
/*
* If modifying a memslot, reset all the rmap dirty bits.
@@ -3017,6 +3082,22 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
return;
}
+static void kvmppc_setup_partition_table(struct kvm *kvm)
+{
+ unsigned long dw0, dw1;
+
+ /* PS field - page size for VRMA */
+ dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
+ ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
+ /* HTABSIZE and HTABORG fields */
+ dw0 |= kvm->arch.sdr1;
+
+ /* Second dword has GR=0; other fields are unused since UPRT=0 */
+ dw1 = 0;
+
+ mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
+}
+
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
@@ -3068,17 +3149,20 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
psize == 0x1000000))
goto out_srcu;
- /* Update VRMASD field in the LPCR */
senc = slb_pgsize_encoding(psize);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- /* the -4 is to account for senc values starting at 0x10 */
- lpcr = senc << (LPCR_VRMASD_SH - 4);
-
/* Create HPTEs in the hash page table for the VRMA */
kvmppc_map_vrma(vcpu, memslot, porder);
- kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+ /* Update VRMASD field in the LPCR */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* the -4 is to account for senc values starting at 0x10 */
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+ } else {
+ kvmppc_setup_partition_table(kvm);
+ }
/* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
smp_wmb();
@@ -3193,14 +3277,18 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
* make sure we flush on each core before running the new VM.
+ * On POWER9, the tlbie in mmu_partition_table_set_entry()
+ * does this flush for us.
*/
- cpumask_setall(&kvm->arch.need_tlb_flush);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ cpumask_setall(&kvm->arch.need_tlb_flush);
/* Start out with the default set of hcalls enabled */
memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
sizeof(kvm->arch.enabled_hcalls));
- kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
/* Init LPCR for virtual RMA mode */
kvm->arch.host_lpid = mfspr(SPRN_LPID);
@@ -3213,9 +3301,29 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/* On POWER8 turn on online bit to enable PURR/SPURR */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
lpcr |= LPCR_ONL;
+ /*
+ * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
+ * Set HVICE bit to enable hypervisor virtualization interrupts.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ lpcr &= ~LPCR_VPM0;
+ lpcr |= LPCR_HVICE;
+ }
+
kvm->arch.lpcr = lpcr;
/*
+ * Work out how many sets the TLB has, for the use of
+ * the TLB invalidation loop in book3s_hv_rmhandlers.S.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
+ else
+ kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */
+
+ /*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
*/
@@ -3279,9 +3387,9 @@ static int kvmppc_core_check_processor_compat_hv(void)
!cpu_has_feature(CPU_FTR_ARCH_206))
return -EIO;
/*
- * Disable KVM for Power9, untill the required bits merged.
+ * Disable KVM for Power9 in radix mode.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
return -EIO;
return 0;
@@ -3635,6 +3743,23 @@ static int kvmppc_book3s_init_hv(void)
if (r)
return r;
+ /*
+ * We need a way of accessing the XICS interrupt controller,
+ * either directly, via paca[cpu].kvm_hstate.xics_phys, or
+ * indirectly, via OPAL.
+ */
+#ifdef CONFIG_SMP
+ if (!get_paca()->kvm_hstate.xics_phys) {
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+ if (!np) {
+ pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
+ return -ENODEV;
+ }
+ }
+#endif
+
kvm_ops_hv.owner = THIS_MODULE;
kvmppc_hv_ops = &kvm_ops_hv;
@@ -3657,3 +3782,4 @@ module_exit(kvmppc_book3s_exit_hv);
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");
+
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 0c84d6bc8356..5bb24be0b346 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -26,6 +26,8 @@
#include <asm/dbell.h>
#include <asm/cputhreads.h>
#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
#define KVM_CMA_CHUNK_ORDER 18
@@ -205,12 +207,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
void kvmhv_rm_send_ipi(int cpu)
{
unsigned long xics_phys;
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
- /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+ /* On POWER9 we can use msgsnd for any destination cpu. */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ msg |= get_hard_smp_processor_id(cpu);
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return;
+ }
+ /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
cpu_first_thread_sibling(cpu) ==
cpu_first_thread_sibling(raw_smp_processor_id())) {
- unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
msg |= cpu_thread_in_core(cpu);
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
return;
@@ -218,7 +226,11 @@ void kvmhv_rm_send_ipi(int cpu)
/* Else poke the target with an IPI */
xics_phys = paca[cpu].kvm_hstate.xics_phys;
- rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ if (xics_phys)
+ rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ else
+ opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu),
+ IPI_PRIORITY);
}
/*
@@ -329,7 +341,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
* saved a copy of the XIRR in the PACA, it will be picked up by
* the host ICP driver.
*/
-static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
{
struct kvmppc_passthru_irqmap *pimap;
struct kvmppc_irq_map *irq_map;
@@ -348,11 +360,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
/* We're handling this interrupt, generic code doesn't need to */
local_paca->kvm_hstate.saved_xirr = 0;
- return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
+ return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
}
#else
-static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
{
return 1;
}
@@ -367,14 +379,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
* -1 if there was a guest wakeup IPI (which has now been cleared)
* -2 if there is PCI passthrough external interrupt that was handled
*/
+static long kvmppc_read_one_intr(bool *again);
long kvmppc_read_intr(void)
{
+ long ret = 0;
+ long rc;
+ bool again;
+
+ do {
+ again = false;
+ rc = kvmppc_read_one_intr(&again);
+ if (rc && (ret == 0 || rc > ret))
+ ret = rc;
+ } while (again);
+ return ret;
+}
+
+static long kvmppc_read_one_intr(bool *again)
+{
unsigned long xics_phys;
u32 h_xirr;
__be32 xirr;
u32 xisr;
u8 host_ipi;
+ int64_t rc;
/* see if a host IPI is pending */
host_ipi = local_paca->kvm_hstate.host_ipi;
@@ -383,8 +412,14 @@ long kvmppc_read_intr(void)
/* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys;
- if (unlikely(!xics_phys))
- return 1;
+ if (!xics_phys) {
+ /* Use OPAL to read the XIRR */
+ rc = opal_rm_int_get_xirr(&xirr, false);
+ if (rc < 0)
+ return 1;
+ } else {
+ xirr = _lwzcix(xics_phys + XICS_XIRR);
+ }
/*
* Save XIRR for later. Since we get control in reverse endian
@@ -392,7 +427,6 @@ long kvmppc_read_intr(void)
* host endian. Note that xirr is the value read from the
* XIRR register, while h_xirr is the host endian version.
*/
- xirr = _lwzcix(xics_phys + XICS_XIRR);
h_xirr = be32_to_cpu(xirr);
local_paca->kvm_hstate.saved_xirr = h_xirr;
xisr = h_xirr & 0xffffff;
@@ -411,8 +445,16 @@ long kvmppc_read_intr(void)
* If it is an IPI, clear the MFRR and EOI it.
*/
if (xisr == XICS_IPI) {
- _stbcix(xics_phys + XICS_MFRR, 0xff);
- _stwcix(xics_phys + XICS_XIRR, xirr);
+ if (xics_phys) {
+ _stbcix(xics_phys + XICS_MFRR, 0xff);
+ _stwcix(xics_phys + XICS_XIRR, xirr);
+ } else {
+ opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff);
+ rc = opal_rm_int_eoi(h_xirr);
+ /* If rc > 0, there is another interrupt pending */
+ *again = rc > 0;
+ }
+
/*
* Need to ensure side effects of above stores
* complete before proceeding.
@@ -429,7 +471,11 @@ long kvmppc_read_intr(void)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
- _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ if (xics_phys)
+ _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ else
+ opal_rm_int_set_mfrr(hard_smp_processor_id(),
+ IPI_PRIORITY);
/* Let side effects complete */
smp_mb();
return 1;
@@ -440,5 +486,5 @@ long kvmppc_read_intr(void)
return -1;
}
- return kvmppc_check_passthru(xisr, xirr);
+ return kvmppc_check_passthru(xisr, xirr, again);
}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 0fa70a9618d7..7ef0993214f3 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -16,6 +16,7 @@
#include <asm/machdep.h>
#include <asm/cputhreads.h>
#include <asm/hmi.h>
+#include <asm/kvm_ppc.h>
/* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 99b4e9d5dd23..9ef3c4be952f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -264,8 +264,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
if (pa)
pteh |= HPTE_V_VALID;
- else
+ else {
pteh |= HPTE_V_ABSENT;
+ ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+ }
/*If we had host pte mapping then Check WIMG */
if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
@@ -351,6 +353,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
+ ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
unlock_rmap(rmap);
} else {
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
@@ -361,6 +364,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
}
}
+ /* Convert to new format on P9 */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ ptel = hpte_old_to_new_r(pteh, ptel);
+ pteh = hpte_old_to_new_v(pteh);
+ }
hpte[1] = cpu_to_be64(ptel);
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
@@ -386,6 +394,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
#endif
+static inline int is_mmio_hpte(unsigned long v, unsigned long r)
+{
+ return ((v & HPTE_V_ABSENT) &&
+ (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
+}
+
static inline int try_lock_tlbie(unsigned int *lock)
{
unsigned int tmp, old;
@@ -409,13 +424,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
{
long i;
+ /*
+ * We use the POWER9 5-operand versions of tlbie and tlbiel here.
+ * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
+ * the RS field, this is backwards-compatible with P7 and P8.
+ */
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
- asm volatile(PPC_TLBIE(%1,%0) : :
+ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
@@ -423,7 +443,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
- asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+ asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
+ "r" (rbvalues[i]), "r" (0));
asm volatile("ptesync" : : : "memory");
}
}
@@ -435,18 +456,23 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
__be64 *hpte;
unsigned long v, r, rb;
struct revmap_entry *rev;
- u64 pte;
+ u64 pte, orig_pte, pte_r;
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(hpte[0]);
+ pte = orig_pte = be64_to_cpu(hpte[0]);
+ pte_r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pte = hpte_new_to_old_v(pte, pte_r);
+ pte_r = hpte_new_to_old_r(pte_r);
+ }
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (pte & avpn) != 0)) {
- __unlock_hpte(hpte, pte);
+ __unlock_hpte(hpte, orig_pte);
return H_NOT_FOUND;
}
@@ -454,7 +480,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
v = pte & ~HPTE_V_HVLOCK;
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
+ rb = compute_tlbie_rb(v, pte_r, pte_index);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
* The reference (R) and change (C) bits in a HPT
@@ -472,6 +498,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
+ if (is_mmio_hpte(v, pte_r))
+ atomic64_inc(&kvm->arch.mmio_update);
+
if (v & HPTE_V_ABSENT)
v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
hpret[0] = v;
@@ -498,7 +527,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
int global;
long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4];
- u64 hp0;
+ u64 hp0, hp1;
global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
@@ -531,6 +560,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
found = 0;
hp0 = be64_to_cpu(hp[0]);
+ hp1 = be64_to_cpu(hp[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
switch (flags & 3) {
case 0: /* absolute */
@@ -561,13 +595,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
hp[0] = 0;
+ if (is_mmio_hpte(hp0, hp1))
+ atomic64_inc(&kvm->arch.mmio_update);
continue;
}
/* leave it locked */
hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
- tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
- be64_to_cpu(hp[1]), pte_index);
+ tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
indexes[n] = j;
hptes[n] = hp;
revs[n] = rev;
@@ -605,7 +640,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
__be64 *hpte;
struct revmap_entry *rev;
unsigned long v, r, rb, mask, bits;
- u64 pte;
+ u64 pte_v, pte_r;
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
@@ -613,14 +648,16 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(hpte[0]);
- if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
- ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
- __unlock_hpte(hpte, pte);
+ v = pte_v = be64_to_cpu(hpte[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
+ if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
+ __unlock_hpte(hpte, pte_v);
return H_NOT_FOUND;
}
- v = pte;
+ pte_r = be64_to_cpu(hpte[1]);
bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI;
bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
@@ -642,22 +679,26 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
* readonly to writable. If it should be writable, we'll
* take a trap and let the page fault code sort it out.
*/
- pte = be64_to_cpu(hpte[1]);
- r = (pte & ~mask) | bits;
- if (hpte_is_writable(r) && !hpte_is_writable(pte))
+ r = (pte_r & ~mask) | bits;
+ if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
r = hpte_make_readonly(r);
/* If the PTE is changing, invalidate it first */
- if (r != pte) {
+ if (r != pte_r) {
rb = compute_tlbie_rb(v, r, pte_index);
- hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+ hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
true);
+ /* Don't lose R/C bit updates done by hardware */
+ r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r);
}
}
- unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+ unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
asm volatile("ptesync" : : : "memory");
+ if (is_mmio_hpte(v, pte_r))
+ atomic64_inc(&kvm->arch.mmio_update);
+
return H_SUCCESS;
}
@@ -681,6 +722,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
if (v & HPTE_V_ABSENT) {
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
@@ -798,10 +843,16 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
unsigned long rb;
+ u64 hp0, hp1;
hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
- pte_index);
+ hp0 = be64_to_cpu(hptep[0]);
+ hp1 = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
+ rb = compute_tlbie_rb(hp0, hp1, pte_index);
do_tlbies(kvm, &rb, 1, 1, true);
}
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
@@ -811,9 +862,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
{
unsigned long rb;
unsigned char rbyte;
+ u64 hp0, hp1;
- rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
- pte_index);
+ hp0 = be64_to_cpu(hptep[0]);
+ hp1 = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
+ rb = compute_tlbie_rb(hp0, hp1, pte_index);
rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
/* modify only the second-last byte, which contains the ref bit */
*((char *)hptep + 14) = rbyte;
@@ -828,6 +885,37 @@ static int slb_base_page_shift[4] = {
20, /* 1M, unsupported */
};
+static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
+ unsigned long eaddr, unsigned long slb_v, long mmio_update)
+{
+ struct mmio_hpte_cache_entry *entry = NULL;
+ unsigned int pshift;
+ unsigned int i;
+
+ for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
+ entry = &vcpu->arch.mmio_cache.entry[i];
+ if (entry->mmio_update == mmio_update) {
+ pshift = entry->slb_base_pshift;
+ if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
+ entry->slb_v == slb_v)
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+static struct mmio_hpte_cache_entry *
+ next_mmio_cache_entry(struct kvm_vcpu *vcpu)
+{
+ unsigned int index = vcpu->arch.mmio_cache.index;
+
+ vcpu->arch.mmio_cache.index++;
+ if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
+ vcpu->arch.mmio_cache.index = 0;
+
+ return &vcpu->arch.mmio_cache.entry[index];
+}
+
/* When called from virtmode, this func should be protected by
* preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
* can trigger deadlock issue.
@@ -842,7 +930,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
unsigned long avpn;
__be64 *hpte;
unsigned long mask, val;
- unsigned long v, r;
+ unsigned long v, r, orig_v;
/* Get page shift, work out hash and AVPN etc. */
mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
@@ -877,6 +965,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
for (i = 0; i < 16; i += 2) {
/* Read the PTE racily */
v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
/* Check valid/absent, hash, segment size and AVPN */
if (!(v & valid) || (v & mask) != val)
@@ -885,8 +975,12 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
/* Lock the PTE and read it under the lock */
while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
cpu_relax();
- v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+ v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[i+1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
/*
* Check the HPTE again, including base page size
@@ -896,7 +990,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
- __unlock_hpte(&hpte[i], v);
+ __unlock_hpte(&hpte[i], orig_v);
}
if (val & HPTE_V_SECONDARY)
@@ -924,30 +1018,45 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
struct kvm *kvm = vcpu->kvm;
long int index;
- unsigned long v, r, gr;
+ unsigned long v, r, gr, orig_v;
__be64 *hpte;
unsigned long valid;
struct revmap_entry *rev;
unsigned long pp, key;
+ struct mmio_hpte_cache_entry *cache_entry = NULL;
+ long mmio_update = 0;
/* For protection fault, expect to find a valid HPTE */
valid = HPTE_V_VALID;
- if (status & DSISR_NOHPTE)
+ if (status & DSISR_NOHPTE) {
valid |= HPTE_V_ABSENT;
-
- index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
- if (index < 0) {
- if (status & DSISR_NOHPTE)
- return status; /* there really was no HPTE */
- return 0; /* for prot fault, HPTE disappeared */
+ mmio_update = atomic64_read(&kvm->arch.mmio_update);
+ cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
}
- hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
- r = be64_to_cpu(hpte[1]);
- rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
- gr = rev->guest_rpte;
+ if (cache_entry) {
+ index = cache_entry->pte_index;
+ v = cache_entry->hpte_v;
+ r = cache_entry->hpte_r;
+ gr = cache_entry->rpte;
+ } else {
+ index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
+ hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+ v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+ r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
+ rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
+ gr = rev->guest_rpte;
- unlock_hpte(hpte, v);
+ unlock_hpte(hpte, orig_v);
+ }
/* For not found, if the HPTE is valid by now, retry the instruction */
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
@@ -985,12 +1094,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
vcpu->arch.pgfault_index = index;
vcpu->arch.pgfault_hpte[0] = v;
vcpu->arch.pgfault_hpte[1] = r;
+ vcpu->arch.pgfault_cache = cache_entry;
/* Check the storage key to see if it is possibly emulated MMIO */
- if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
- (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
- (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
- return -2; /* MMIO emulation - load instr word */
+ if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
+ if (!cache_entry) {
+ unsigned int pshift = 12;
+ unsigned int pshift_index;
+
+ if (slb_v & SLB_VSID_L) {
+ pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
+ pshift = slb_base_page_shift[pshift_index];
+ }
+ cache_entry = next_mmio_cache_entry(vcpu);
+ cache_entry->eaddr = addr;
+ cache_entry->slb_base_pshift = pshift;
+ cache_entry->pte_index = index;
+ cache_entry->hpte_v = v;
+ cache_entry->hpte_r = r;
+ cache_entry->rpte = gr;
+ cache_entry->slb_v = slb_v;
+ cache_entry->mmio_update = mmio_update;
+ }
+ if (data && (vcpu->arch.shregs.msr & MSR_IR))
+ return -2; /* MMIO emulation - load instr word */
+ }
return -1; /* send fault up to host kernel mode */
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index a0ea63ac2b52..06edc4366639 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -70,7 +70,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
- icp_native_cause_ipi_rm(hcpu);
+ if (paca[hcpu].kvm_hstate.xics_phys)
+ icp_native_cause_ipi_rm(hcpu);
+ else
+ opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
+ IPI_PRIORITY);
}
#else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
@@ -737,7 +741,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
unsigned long eoi_rc;
-static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
+static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
{
unsigned long xics_phys;
int64_t rc;
@@ -751,7 +755,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
/* EOI it */
xics_phys = local_paca->kvm_hstate.xics_phys;
- _stwcix(xics_phys + XICS_XIRR, xirr);
+ if (xics_phys) {
+ _stwcix(xics_phys + XICS_XIRR, xirr);
+ } else {
+ rc = opal_rm_int_eoi(be32_to_cpu(xirr));
+ *again = rc > 0;
+ }
}
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
@@ -809,9 +818,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
}
long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
- u32 xirr,
+ __be32 xirr,
struct kvmppc_irq_map *irq_map,
- struct kvmppc_passthru_irqmap *pimap)
+ struct kvmppc_passthru_irqmap *pimap,
+ bool *again)
{
struct kvmppc_xics *xics;
struct kvmppc_icp *icp;
@@ -825,7 +835,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
icp_rm_deliver_irq(xics, icp, irq);
/* EOI the interrupt */
- icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
+ icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
+ again);
if (check_too_hard(xics, icp) == H_TOO_HARD)
return 2;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c3c1d1bcfc67..9338a818e05c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
cmpwi r0, 0
beq 57f
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
- mfspr r4, SPRN_LPCR
- rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
- mtspr SPRN_LPCR, r4
- isync
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
- nap
- b .
+ mfspr r5, SPRN_LPCR
+ rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+ b kvm_nap_sequence
57: li r0, 0
stbx r0, r3, r4
@@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
+/* Stack frame offsets */
+#define STACK_SLOT_TID (112-16)
+#define STACK_SLOT_PSSCR (112-24)
+
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -581,12 +577,14 @@ kvmppc_hv_entry:
ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
cmpwi r6,0
bne 10f
- ld r6,KVM_SDR1(r9)
lwz r7,KVM_LPID(r9)
+BEGIN_FTR_SECTION
+ ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -607,12 +605,8 @@ kvmppc_hv_entry:
stdcx. r7,0,r6
bne 23b
/* Flush the TLB of any entries for this LPID */
- /* use arch 2.07S as a proxy for POWER8 */
-BEGIN_FTR_SECTION
- li r6,512 /* POWER8 has 512 sets */
-FTR_SECTION_ELSE
- li r6,128 /* POWER7 has 128 sets */
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+ lwz r6,KVM_TLB_SETS(r9)
+ li r0,0 /* RS for P9 version of tlbiel */
mtctr r6
li r7,0x800 /* IS field = 0b10 */
ptesync
@@ -698,6 +692,14 @@ kvmppc_got_guest:
mtspr SPRN_PURR,r7
mtspr SPRN_SPURR,r8
+ /* Save host values of some registers */
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_TIDR
+ mfspr r6, SPRN_PSSCR
+ std r5, STACK_SLOT_TID(r1)
+ std r6, STACK_SLOT_PSSCR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
BEGIN_FTR_SECTION
/* Set partition DABR */
/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
@@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 24(r4)
ld r6, VCPU_SIER(r4)
+ mtspr SPRN_MMCR2, r5
+ mtspr SPRN_SIER, r6
+BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4)
ld r9, VCPU_MMCR + 32(r4)
- mtspr SPRN_MMCR2, r5
- mtspr SPRN_SIER, r6
mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3
isync
@@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_EBBHR, r8
ld r5, VCPU_EBBRR(r4)
ld r6, VCPU_BESCR(r4)
- ld r7, VCPU_CSIGR(r4)
- ld r8, VCPU_TACR(r4)
+ lwz r7, VCPU_GUEST_PID(r4)
+ ld r8, VCPU_WORT(r4)
mtspr SPRN_EBBRR, r5
mtspr SPRN_BESCR, r6
- mtspr SPRN_CSIGR, r7
- mtspr SPRN_TACR, r8
+ mtspr SPRN_PID, r7
+ mtspr SPRN_WORT, r8
+BEGIN_FTR_SECTION
+ /* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
- lwz r7, VCPU_GUEST_PID(r4)
- ld r8, VCPU_WORT(r4)
+ ld r7, VCPU_CSIGR(r4)
+ ld r8, VCPU_TACR(r4)
mtspr SPRN_TCSCR, r5
mtspr SPRN_ACOP, r6
- mtspr SPRN_PID, r7
- mtspr SPRN_WORT, r8
+ mtspr SPRN_CSIGR, r7
+ mtspr SPRN_TACR, r8
+FTR_SECTION_ELSE
+ /* POWER9-only registers */
+ ld r5, VCPU_TID(r4)
+ ld r6, VCPU_PSSCR(r4)
+ oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ mtspr SPRN_TIDR, r5
+ mtspr SPRN_PSSCR, r6
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
/*
@@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r8, VCPU_EBBHR(r9)
mfspr r5, SPRN_EBBRR
mfspr r6, SPRN_BESCR
- mfspr r7, SPRN_CSIGR
- mfspr r8, SPRN_TACR
+ mfspr r7, SPRN_PID
+ mfspr r8, SPRN_WORT
std r5, VCPU_EBBRR(r9)
std r6, VCPU_BESCR(r9)
- std r7, VCPU_CSIGR(r9)
- std r8, VCPU_TACR(r9)
+ stw r7, VCPU_GUEST_PID(r9)
+ std r8, VCPU_WORT(r9)
+BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
- mfspr r7, SPRN_PID
- mfspr r8, SPRN_WORT
+ mfspr r7, SPRN_CSIGR
+ mfspr r8, SPRN_TACR
std r5, VCPU_TCSCR(r9)
std r6, VCPU_ACOP(r9)
- stw r7, VCPU_GUEST_PID(r9)
- std r8, VCPU_WORT(r9)
+ std r7, VCPU_CSIGR(r9)
+ std r8, VCPU_TACR(r9)
+FTR_SECTION_ELSE
+ mfspr r5, SPRN_TIDR
+ mfspr r6, SPRN_PSSCR
+ std r5, VCPU_TID(r9)
+ rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
+ rotldi r6, r6, 60
+ std r6, VCPU_PSSCR(r9)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
@@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0
- mtspr SPRN_TCSCR, r0
mtspr SPRN_WORT, r0
+BEGIN_FTR_SECTION
+ mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
@@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stw r8, VCPU_PMC + 20(r9)
BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER
+ std r5, VCPU_SIER(r9)
+BEGIN_FTR_SECTION_NESTED(96)
mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2
mfspr r8, SPRN_MMCRS
- std r5, VCPU_SIER(r9)
stw r6, VCPU_PMC + 24(r9)
stw r7, VCPU_PMC + 28(r9)
std r8, VCPU_MMCR + 32(r9)
lis r4, 0x8000
mtspr SPRN_MMCRS, r4
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22:
/* Clear out SLB */
@@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
slbia
ptesync
+ /* Restore host values of some registers */
+BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_TID(r1)
+ ld r6, STACK_SLOT_PSSCR(r1)
+ mtspr SPRN_TIDR, r5
+ mtspr SPRN_PSSCR, r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
@@ -1552,12 +1587,14 @@ kvmhv_switch_to_host:
beq 19f
/* Primary thread switches back to host partition */
- ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4)
+BEGIN_FTR_SECTION
+ ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
- mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_SDR1,r6 /* switch to host page table */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION
ori r5, r5, LPCR_PECEDH
rlwimi r5, r3, 0, LPCR_PECEDP
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+kvm_nap_sequence: /* desired LPCR value in r5 */
+BEGIN_FTR_SECTION
+ /*
+ * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
+ * enable state loss = 1 (allow SMT mode switch)
+ * requested level = 0 (just stop dispatching)
+ */
+ lis r3, (PSSCR_EC | PSSCR_ESL)@h
+ mtspr SPRN_PSSCR, r3
+ /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
+ li r4, LPCR_PECE_HVEE@higher
+ sldi r4, r4, 32
+ or r5, r5, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
li r0, 0
@@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ld r0, HSTATE_SCRATCH0(r13)
1: cmpd r0, r0
bne 1b
+BEGIN_FTR_SECTION
nap
+FTR_SECTION_ELSE
+ PPC_STOP
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
b .
33: mr r4, r3
@@ -2600,11 +2656,13 @@ kvmppc_save_tm:
mfctr r7
mfspr r8, SPRN_AMR
mfspr r10, SPRN_TAR
+ mfxer r11
std r5, VCPU_LR_TM(r9)
stw r6, VCPU_CR_TM(r9)
std r7, VCPU_CTR_TM(r9)
std r8, VCPU_AMR_TM(r9)
std r10, VCPU_TAR_TM(r9)
+ std r11, VCPU_XER_TM(r9)
/* Restore r12 as trap number. */
lwz r12, VCPU_TRAP(r9)
@@ -2697,11 +2755,13 @@ kvmppc_restore_tm:
ld r7, VCPU_CTR_TM(r4)
ld r8, VCPU_AMR_TM(r4)
ld r9, VCPU_TAR_TM(r4)
+ ld r10, VCPU_XER_TM(r4)
mtlr r5
mtcr r6
mtctr r7
mtspr SPRN_AMR, r8
mtspr SPRN_TAR, r9
+ mtxer r10
/*
* Load up PPR and DSCR values but don't put them in the actual SPRs
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 70963c845e96..efd1183a6b16 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
- case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
@@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
r = 1;
break;
+
+ case KVM_CAP_PPC_ALLOC_HTAB:
+ r = hv_enabled;
+ break;
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
- if (hv_enabled)
- r = threads_per_subcore;
- else
- r = 0;
+ r = 0;
+ if (hv_enabled) {
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ r = 1;
+ else
+ r = threads_per_subcore;
+ }
break;
case KVM_CAP_PPC_RMA:
r = 0;
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index fb21990c0fb4..ebc6dd449556 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup,
__entry->tgid = current->tgid;
),
- TP_printk("%s time %lld ns, tgid=%d",
+ TP_printk("%s time %llu ns, tgid=%d",
__entry->waited ? "wait" : "poll",
__entry->ns, __entry->tgid)
);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 83ddc0e171b0..ad9fd5245be2 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
return -1;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
i, hpte_v, hpte_r);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
+ hpte_v = hpte_old_to_new_v(hpte_v);
+ }
+
hptep->r = cpu_to_be64(hpte_r);
/* Guarantee the second dword is visible before the valid bit */
eieio();
@@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
vpn, want_v & HPTE_V_AVPN, slot, newpp);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_lock_hpte(hptep);
/* recheck with locks held */
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))) {
ret = -1;
@@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */
@@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
@@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* Even if we miss, we need to invalidate the TLB */
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* Look at the 8 bit LP value */
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
+ hpte_r = hpte_new_to_old_r(hpte_r);
+ }
if (!(hpte_v & HPTE_V_LARGE)) {
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
@@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
- else
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
-
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local)
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v,
+ be64_to_cpu(hptep->r));
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 78dabf065ba9..8410b4bb36ed 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -796,37 +796,17 @@ static void update_hid_for_hash(void)
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
- unsigned long ps_field;
- unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ mmu_partition_table_init();
/*
- * slb llp encoding for the page size used in VPM real mode.
- * We can ignore that for lpid 0
+ * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
+ * For now, UPRT is 0 and we have no segment table.
*/
- ps_field = 0;
htab_size = __ilog2(htab_size) - 18;
-
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
- MEMBLOCK_ALLOC_ANYWHERE));
-
- /* Initialize the Partition Table with no entries */
- memset((void *)partition_tb, 0, patb_size);
- partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
- /*
- * FIXME!! This should be done via update_partition table
- * For now UPRT is 0 for us.
- */
- partition_tb->patb1 = 0;
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
pr_info("Partition table %p\n", partition_tb);
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
update_hid_for_hash();
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
-
}
static void __init htab_initialize(void)
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 688b54517655..8d941c692eb3 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -177,23 +177,15 @@ redo:
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field;
+ unsigned long rts_field, dw0;
+ mmu_partition_table_init();
rts_field = radix__get_tree_size();
+ dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
+ mmu_partition_table_set_entry(0, dw0, 0);
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
- partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
- RADIX_PGD_INDEX_SIZE | PATB_HR);
pr_info("Initializing Radix MMU\n");
pr_info("Partition table %p\n", partition_tb);
-
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
}
void __init radix_init_native(void)
@@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void)
radix_init_partition_table();
}
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
radix_init_pgtable();
}
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f5e8d4edb808..8bca7f58afc4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
}
}
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void __init mmu_partition_table_init(void)
+{
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
+ partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+ MEMBLOCK_ALLOC_ANYWHERE));
+
+ /* Initialize the Partition Table with no entries */
+ memset((void *)partition_tb, 0, patb_size);
+
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1)
+{
+ partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+ /* Global flush of TLBs and partition table caches for this lpid */
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 44d2d842cee7..3aa40f1b20f5 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
+OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
+OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
+OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 6c9a65b52e63..b3b8930ac52f 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind);
EXPORT_SYMBOL_GPL(opal_leds_set_ind);
/* Export this symbol for PowerNV Operator Panel class driver */
EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
+/* Export this for KVM */
+EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index cb3c50328de8..cc2b281a3766 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index aa35245d8d6d..f2c98f6c1c9c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 028f97be5bae..c6722112527d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -136,6 +136,7 @@ config S390
select HAVE_CMPXCHG_LOCAL
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_API_DEBUG
+ select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -169,6 +170,7 @@ config S390
select OLD_SIGSUSPEND3
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
select TTY
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 0daa070d6c9d..6bd2c9022be3 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -10,7 +10,7 @@ targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
targets += misc.o piggy.o sizes.h head.o
-KBUILD_CFLAGS := -m64 -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS := -m64 -D__KERNEL__ -O2
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float
KBUILD_CFLAGS += $(call cc-option,-mpacked-stack)
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
index 28c4f96a2d9c..11f6254c561e 100644
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -46,7 +46,7 @@ mover_end:
.align 8
.Lstack:
- .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+ .quad 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
.Loffset:
.quad 0x11000
.Lmvsize:
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 45968686f918..e659daffe368 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -66,6 +66,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_CMA=y
+CONFIG_CMA_DEBUG=y
+CONFIG_CMA_DEBUGFS=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZPOOL=m
CONFIG_ZBUD=m
@@ -366,6 +368,8 @@ CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_TCPPROBE=m
CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
@@ -438,7 +442,6 @@ CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_NLMON=m
-CONFIG_VHOST_NET=m
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_INTEL is not set
@@ -693,3 +696,4 @@ CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 1dd05e345c4d..95ceac50bc65 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -362,6 +362,8 @@ CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_TCPPROBE=m
CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
@@ -434,7 +436,6 @@ CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_NLMON=m
-CONFIG_VHOST_NET=m
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_INTEL is not set
@@ -633,3 +634,4 @@ CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 29d1178666f0..bc7b176f5795 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -362,6 +362,8 @@ CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_NET_TCPPROBE=m
CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
@@ -434,7 +436,6 @@ CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_NLMON=m
-CONFIG_VHOST_NET=m
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_INTEL is not set
@@ -632,3 +633,4 @@ CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
+CONFIG_VHOST_NET=m
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 9cc050f9536c..1113389d0a39 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -507,8 +507,10 @@ static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
prng_data->prngws.byte_counter += n;
prng_data->prngws.reseed_counter += n;
- if (copy_to_user(ubuf, prng_data->buf, chunk))
- return -EFAULT;
+ if (copy_to_user(ubuf, prng_data->buf, chunk)) {
+ ret = -EFAULT;
+ break;
+ }
nbytes -= chunk;
ret += chunk;
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 09bccb224d03..cf8a2d92467f 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -3,6 +3,7 @@
*
* Copyright IBM Corp. 2006, 2008
* Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ * License: GPL
*/
#define KMSG_COMPONENT "hypfs"
@@ -18,7 +19,8 @@
#include <linux/time.h>
#include <linux/parser.h>
#include <linux/sysfs.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kobject.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/uio.h>
@@ -443,7 +445,6 @@ static struct file_system_type hypfs_type = {
.mount = hypfs_mount,
.kill_sb = hypfs_kill_super
};
-MODULE_ALIAS_FS("s390_hypfs");
static const struct super_operations hypfs_s_ops = {
.statfs = simple_statfs,
@@ -497,21 +498,4 @@ fail_dbfs_exit:
pr_err("Initialization of hypfs failed with rc=%i\n", rc);
return rc;
}
-
-static void __exit hypfs_exit(void)
-{
- unregister_filesystem(&hypfs_type);
- sysfs_remove_mount_point(hypervisor_kobj, "s390");
- hypfs_diag0c_exit();
- hypfs_sprp_exit();
- hypfs_vm_exit();
- hypfs_diag_exit();
- hypfs_dbfs_exit();
-}
-
-module_init(hypfs_init)
-module_exit(hypfs_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Michael Holzheu <holzheu@de.ibm.com>");
-MODULE_DESCRIPTION("s390 Hypervisor Filesystem");
+device_initcall(hypfs_init)
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 20f196b82a6e..8aea32fe8bd2 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,6 +1,6 @@
-
-
+generic-y += asm-offsets.h
generic-y += clkdev.h
+generic-y += dma-contiguous.h
generic-y += export.h
generic-y += irq_work.h
generic-y += mcs_spinlock.h
diff --git a/arch/s390/include/asm/asm-offsets.h b/arch/s390/include/asm/asm-offsets.h
deleted file mode 100644
index d370ee36a182..000000000000
--- a/arch/s390/include/asm/asm-offsets.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <generated/asm-offsets.h>
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index d28cc2f5b7b2..f7f69dfd2db2 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -1,13 +1,8 @@
/*
- * Copyright IBM Corp. 1999, 2009
+ * Copyright IBM Corp. 1999, 2016
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
* Denis Joseph Barrow,
- * Arnd Bergmann <arndb@de.ibm.com>,
- *
- * Atomic operations that C can't guarantee us.
- * Useful for resource counting etc.
- * s390 uses 'Compare And Swap' for atomicity in SMP environment.
- *
+ * Arnd Bergmann,
*/
#ifndef __ARCH_S390_ATOMIC__
@@ -15,62 +10,12 @@
#include <linux/compiler.h>
#include <linux/types.h>
+#include <asm/atomic_ops.h>
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
#define ATOMIC_INIT(i) { (i) }
-#define __ATOMIC_NO_BARRIER "\n"
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-
-#define __ATOMIC_OR "lao"
-#define __ATOMIC_AND "lan"
-#define __ATOMIC_ADD "laa"
-#define __ATOMIC_XOR "lax"
-#define __ATOMIC_BARRIER "bcr 14,0\n"
-
-#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
-({ \
- int old_val; \
- \
- typecheck(atomic_t *, ptr); \
- asm volatile( \
- op_string " %0,%2,%1\n" \
- __barrier \
- : "=d" (old_val), "+Q" ((ptr)->counter) \
- : "d" (op_val) \
- : "cc", "memory"); \
- old_val; \
-})
-
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-#define __ATOMIC_OR "or"
-#define __ATOMIC_AND "nr"
-#define __ATOMIC_ADD "ar"
-#define __ATOMIC_XOR "xr"
-#define __ATOMIC_BARRIER "\n"
-
-#define __ATOMIC_LOOP(ptr, op_val, op_string, __barrier) \
-({ \
- int old_val, new_val; \
- \
- typecheck(atomic_t *, ptr); \
- asm volatile( \
- " l %0,%2\n" \
- "0: lr %1,%0\n" \
- op_string " %1,%3\n" \
- " cs %0,%1,%2\n" \
- " jl 0b" \
- : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
- : "d" (op_val) \
- : "cc", "memory"); \
- old_val; \
-})
-
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
static inline int atomic_read(const atomic_t *v)
{
int c;
@@ -90,27 +35,23 @@ static inline void atomic_set(atomic_t *v, int i)
static inline int atomic_add_return(int i, atomic_t *v)
{
- return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER) + i;
+ return __atomic_add_barrier(i, &v->counter) + i;
}
static inline int atomic_fetch_add(int i, atomic_t *v)
{
- return __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_BARRIER);
+ return __atomic_add_barrier(i, &v->counter);
}
static inline void atomic_add(int i, atomic_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
- asm volatile(
- "asi %0,%1\n"
- : "+Q" (v->counter)
- : "i" (i)
- : "cc", "memory");
+ __atomic_add_const(i, &v->counter);
return;
}
#endif
- __ATOMIC_LOOP(v, i, __ATOMIC_ADD, __ATOMIC_NO_BARRIER);
+ __atomic_add(i, &v->counter);
}
#define atomic_add_negative(_i, _v) (atomic_add_return(_i, _v) < 0)
@@ -125,19 +66,19 @@ static inline void atomic_add(int i, atomic_t *v)
#define atomic_dec_return(_v) atomic_sub_return(1, _v)
#define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0)
-#define ATOMIC_OPS(op, OP) \
+#define ATOMIC_OPS(op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
- __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_NO_BARRIER); \
+ __atomic_##op(i, &v->counter); \
} \
static inline int atomic_fetch_##op(int i, atomic_t *v) \
{ \
- return __ATOMIC_LOOP(v, i, __ATOMIC_##OP, __ATOMIC_BARRIER); \
+ return __atomic_##op##_barrier(i, &v->counter); \
}
-ATOMIC_OPS(and, AND)
-ATOMIC_OPS(or, OR)
-ATOMIC_OPS(xor, XOR)
+ATOMIC_OPS(and)
+ATOMIC_OPS(or)
+ATOMIC_OPS(xor)
#undef ATOMIC_OPS
@@ -145,12 +86,7 @@ ATOMIC_OPS(xor, XOR)
static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
{
- asm volatile(
- " cs %0,%2,%1"
- : "+d" (old), "+Q" (v->counter)
- : "d" (new)
- : "cc", "memory");
- return old;
+ return __atomic_cmpxchg(&v->counter, old, new);
}
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
@@ -168,65 +104,11 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
return c;
}
-
-#undef __ATOMIC_LOOP
-
#define ATOMIC64_INIT(i) { (i) }
-#define __ATOMIC64_NO_BARRIER "\n"
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-
-#define __ATOMIC64_OR "laog"
-#define __ATOMIC64_AND "lang"
-#define __ATOMIC64_ADD "laag"
-#define __ATOMIC64_XOR "laxg"
-#define __ATOMIC64_BARRIER "bcr 14,0\n"
-
-#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
-({ \
- long long old_val; \
- \
- typecheck(atomic64_t *, ptr); \
- asm volatile( \
- op_string " %0,%2,%1\n" \
- __barrier \
- : "=d" (old_val), "+Q" ((ptr)->counter) \
- : "d" (op_val) \
- : "cc", "memory"); \
- old_val; \
-})
-
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-#define __ATOMIC64_OR "ogr"
-#define __ATOMIC64_AND "ngr"
-#define __ATOMIC64_ADD "agr"
-#define __ATOMIC64_XOR "xgr"
-#define __ATOMIC64_BARRIER "\n"
-
-#define __ATOMIC64_LOOP(ptr, op_val, op_string, __barrier) \
-({ \
- long long old_val, new_val; \
- \
- typecheck(atomic64_t *, ptr); \
- asm volatile( \
- " lg %0,%2\n" \
- "0: lgr %1,%0\n" \
- op_string " %1,%3\n" \
- " csg %0,%1,%2\n" \
- " jl 0b" \
- : "=&d" (old_val), "=&d" (new_val), "+Q" ((ptr)->counter)\
- : "d" (op_val) \
- : "cc", "memory"); \
- old_val; \
-})
-
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-static inline long long atomic64_read(const atomic64_t *v)
+static inline long atomic64_read(const atomic64_t *v)
{
- long long c;
+ long c;
asm volatile(
" lg %0,%1\n"
@@ -234,71 +116,60 @@ static inline long long atomic64_read(const atomic64_t *v)
return c;
}
-static inline void atomic64_set(atomic64_t *v, long long i)
+static inline void atomic64_set(atomic64_t *v, long i)
{
asm volatile(
" stg %1,%0\n"
: "=Q" (v->counter) : "d" (i));
}
-static inline long long atomic64_add_return(long long i, atomic64_t *v)
+static inline long atomic64_add_return(long i, atomic64_t *v)
{
- return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER) + i;
+ return __atomic64_add_barrier(i, &v->counter) + i;
}
-static inline long long atomic64_fetch_add(long long i, atomic64_t *v)
+static inline long atomic64_fetch_add(long i, atomic64_t *v)
{
- return __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_BARRIER);
+ return __atomic64_add_barrier(i, &v->counter);
}
-static inline void atomic64_add(long long i, atomic64_t *v)
+static inline void atomic64_add(long i, atomic64_t *v)
{
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
if (__builtin_constant_p(i) && (i > -129) && (i < 128)) {
- asm volatile(
- "agsi %0,%1\n"
- : "+Q" (v->counter)
- : "i" (i)
- : "cc", "memory");
+ __atomic64_add_const(i, &v->counter);
return;
}
#endif
- __ATOMIC64_LOOP(v, i, __ATOMIC64_ADD, __ATOMIC64_NO_BARRIER);
+ __atomic64_add(i, &v->counter);
}
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-static inline long long atomic64_cmpxchg(atomic64_t *v,
- long long old, long long new)
+static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new)
{
- asm volatile(
- " csg %0,%2,%1"
- : "+d" (old), "+Q" (v->counter)
- : "d" (new)
- : "cc", "memory");
- return old;
+ return __atomic64_cmpxchg(&v->counter, old, new);
}
-#define ATOMIC64_OPS(op, OP) \
+#define ATOMIC64_OPS(op) \
static inline void atomic64_##op(long i, atomic64_t *v) \
{ \
- __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \
+ __atomic64_##op(i, &v->counter); \
} \
static inline long atomic64_fetch_##op(long i, atomic64_t *v) \
{ \
- return __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_BARRIER); \
+ return __atomic64_##op##_barrier(i, &v->counter); \
}
-ATOMIC64_OPS(and, AND)
-ATOMIC64_OPS(or, OR)
-ATOMIC64_OPS(xor, XOR)
+ATOMIC64_OPS(and)
+ATOMIC64_OPS(or)
+ATOMIC64_OPS(xor)
#undef ATOMIC64_OPS
-#undef __ATOMIC64_LOOP
-static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
+static inline int atomic64_add_unless(atomic64_t *v, long i, long u)
{
- long long c, old;
+ long c, old;
c = atomic64_read(v);
for (;;) {
@@ -312,9 +183,9 @@ static inline int atomic64_add_unless(atomic64_t *v, long long i, long long u)
return c != u;
}
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
+static inline long atomic64_dec_if_positive(atomic64_t *v)
{
- long long c, old, dec;
+ long c, old, dec;
c = atomic64_read(v);
for (;;) {
@@ -333,9 +204,9 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v)
#define atomic64_inc(_v) atomic64_add(1, _v)
#define atomic64_inc_return(_v) atomic64_add_return(1, _v)
#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0)
-#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long long)(_i), _v)
-#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long long)(_i), _v)
-#define atomic64_sub(_i, _v) atomic64_add(-(long long)(_i), _v)
+#define atomic64_sub_return(_i, _v) atomic64_add_return(-(long)(_i), _v)
+#define atomic64_fetch_sub(_i, _v) atomic64_fetch_add(-(long)(_i), _v)
+#define atomic64_sub(_i, _v) atomic64_add(-(long)(_i), _v)
#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0)
#define atomic64_dec(_v) atomic64_sub(1, _v)
#define atomic64_dec_return(_v) atomic64_sub_return(1, _v)
diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h
new file mode 100644
index 000000000000..ac9e2b939d04
--- /dev/null
+++ b/arch/s390/include/asm/atomic_ops.h
@@ -0,0 +1,130 @@
+/*
+ * Low level function for atomic operations
+ *
+ * Copyright IBM Corp. 1999, 2016
+ */
+
+#ifndef __ARCH_S390_ATOMIC_OPS__
+#define __ARCH_S390_ATOMIC_OPS__
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \
+static inline op_type op_name(op_type val, op_type *ptr) \
+{ \
+ op_type old; \
+ \
+ asm volatile( \
+ op_string " %[old],%[val],%[ptr]\n" \
+ op_barrier \
+ : [old] "=d" (old), [ptr] "+Q" (*ptr) \
+ : [val] "d" (val) : "cc", "memory"); \
+ return old; \
+} \
+
+#define __ATOMIC_OPS(op_name, op_type, op_string) \
+ __ATOMIC_OP(op_name, op_type, op_string, "\n") \
+ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n")
+
+__ATOMIC_OPS(__atomic_add, int, "laa")
+__ATOMIC_OPS(__atomic_and, int, "lan")
+__ATOMIC_OPS(__atomic_or, int, "lao")
+__ATOMIC_OPS(__atomic_xor, int, "lax")
+
+__ATOMIC_OPS(__atomic64_add, long, "laag")
+__ATOMIC_OPS(__atomic64_and, long, "lang")
+__ATOMIC_OPS(__atomic64_or, long, "laog")
+__ATOMIC_OPS(__atomic64_xor, long, "laxg")
+
+#undef __ATOMIC_OPS
+#undef __ATOMIC_OP
+
+static inline void __atomic_add_const(int val, int *ptr)
+{
+ asm volatile(
+ " asi %[ptr],%[val]\n"
+ : [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc");
+}
+
+static inline void __atomic64_add_const(long val, long *ptr)
+{
+ asm volatile(
+ " agsi %[ptr],%[val]\n"
+ : [ptr] "+Q" (*ptr) : [val] "i" (val) : "cc");
+}
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define __ATOMIC_OP(op_name, op_string) \
+static inline int op_name(int val, int *ptr) \
+{ \
+ int old, new; \
+ \
+ asm volatile( \
+ "0: lr %[new],%[old]\n" \
+ op_string " %[new],%[val]\n" \
+ " cs %[old],%[new],%[ptr]\n" \
+ " jl 0b" \
+ : [old] "=d" (old), [new] "=&d" (new), [ptr] "+Q" (*ptr)\
+ : [val] "d" (val), "0" (*ptr) : "cc", "memory"); \
+ return old; \
+}
+
+#define __ATOMIC_OPS(op_name, op_string) \
+ __ATOMIC_OP(op_name, op_string) \
+ __ATOMIC_OP(op_name##_barrier, op_string)
+
+__ATOMIC_OPS(__atomic_add, "ar")
+__ATOMIC_OPS(__atomic_and, "nr")
+__ATOMIC_OPS(__atomic_or, "or")
+__ATOMIC_OPS(__atomic_xor, "xr")
+
+#undef __ATOMIC_OPS
+
+#define __ATOMIC64_OP(op_name, op_string) \
+static inline long op_name(long val, long *ptr) \
+{ \
+ long old, new; \
+ \
+ asm volatile( \
+ "0: lgr %[new],%[old]\n" \
+ op_string " %[new],%[val]\n" \
+ " csg %[old],%[new],%[ptr]\n" \
+ " jl 0b" \
+ : [old] "=d" (old), [new] "=&d" (new), [ptr] "+Q" (*ptr)\
+ : [val] "d" (val), "0" (*ptr) : "cc", "memory"); \
+ return old; \
+}
+
+#define __ATOMIC64_OPS(op_name, op_string) \
+ __ATOMIC64_OP(op_name, op_string) \
+ __ATOMIC64_OP(op_name##_barrier, op_string)
+
+__ATOMIC64_OPS(__atomic64_add, "agr")
+__ATOMIC64_OPS(__atomic64_and, "ngr")
+__ATOMIC64_OPS(__atomic64_or, "ogr")
+__ATOMIC64_OPS(__atomic64_xor, "xgr")
+
+#undef __ATOMIC64_OPS
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+static inline int __atomic_cmpxchg(int *ptr, int old, int new)
+{
+ asm volatile(
+ " cs %[old],%[new],%[ptr]"
+ : [old] "+d" (old), [ptr] "+Q" (*ptr)
+ : [new] "d" (new) : "cc", "memory");
+ return old;
+}
+
+static inline long __atomic64_cmpxchg(long *ptr, long old, long new)
+{
+ asm volatile(
+ " csg %[old],%[new],%[ptr]"
+ : [old] "+d" (old), [ptr] "+Q" (*ptr)
+ : [new] "d" (new) : "cc", "memory");
+ return old;
+}
+
+#endif /* __ARCH_S390_ATOMIC_OPS__ */
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 8043f10da6b5..d92047da5ccb 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -42,57 +42,9 @@
#include <linux/typecheck.h>
#include <linux/compiler.h>
+#include <asm/atomic_ops.h>
#include <asm/barrier.h>
-#define __BITOPS_NO_BARRIER "\n"
-
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-
-#define __BITOPS_OR "laog"
-#define __BITOPS_AND "lang"
-#define __BITOPS_XOR "laxg"
-#define __BITOPS_BARRIER "bcr 14,0\n"
-
-#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
-({ \
- unsigned long __old; \
- \
- typecheck(unsigned long *, (__addr)); \
- asm volatile( \
- __op_string " %0,%2,%1\n" \
- __barrier \
- : "=d" (__old), "+Q" (*(__addr)) \
- : "d" (__val) \
- : "cc", "memory"); \
- __old; \
-})
-
-#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
-#define __BITOPS_OR "ogr"
-#define __BITOPS_AND "ngr"
-#define __BITOPS_XOR "xgr"
-#define __BITOPS_BARRIER "\n"
-
-#define __BITOPS_LOOP(__addr, __val, __op_string, __barrier) \
-({ \
- unsigned long __old, __new; \
- \
- typecheck(unsigned long *, (__addr)); \
- asm volatile( \
- " lg %0,%2\n" \
- "0: lgr %1,%0\n" \
- __op_string " %1,%3\n" \
- " csg %0,%1,%2\n" \
- " jl 0b" \
- : "=&d" (__old), "=&d" (__new), "+Q" (*(__addr))\
- : "d" (__val) \
- : "cc", "memory"); \
- __old; \
-})
-
-#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
-
#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG)
static inline unsigned long *
@@ -128,7 +80,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_NO_BARRIER);
+ __atomic64_or(mask, addr);
}
static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -149,7 +101,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_NO_BARRIER);
+ __atomic64_and(mask, addr);
}
static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -170,7 +122,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_NO_BARRIER);
+ __atomic64_xor(mask, addr);
}
static inline int
@@ -180,7 +132,7 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __BITOPS_LOOP(addr, mask, __BITOPS_OR, __BITOPS_BARRIER);
+ old = __atomic64_or_barrier(mask, addr);
return (old & mask) != 0;
}
@@ -191,7 +143,7 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- old = __BITOPS_LOOP(addr, mask, __BITOPS_AND, __BITOPS_BARRIER);
+ old = __atomic64_and_barrier(mask, addr);
return (old & ~mask) != 0;
}
@@ -202,7 +154,7 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __BITOPS_LOOP(addr, mask, __BITOPS_XOR, __BITOPS_BARRIER);
+ old = __atomic64_xor_barrier(mask, addr);
return (old & mask) != 0;
}
diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h
index 03516476127b..b69d8bc231a5 100644
--- a/arch/s390/include/asm/cpu_mf.h
+++ b/arch/s390/include/asm/cpu_mf.h
@@ -104,7 +104,8 @@ struct hws_basic_entry {
unsigned int P:1; /* 28 PSW Problem state */
unsigned int AS:2; /* 29-30 PSW address-space control */
unsigned int I:1; /* 31 entry valid or invalid */
- unsigned int:16;
+ unsigned int CL:2; /* 32-33 Configuration Level */
+ unsigned int:14;
unsigned int prim_asn:16; /* primary ASN */
unsigned long long ia; /* Instruction Address */
unsigned long long gpp; /* Guest Program Parameter */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1736c7d3c94c..f4381e1fb19e 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -193,7 +193,7 @@ extern char elf_platform[];
do { \
set_personality(PER_LINUX | \
(current->personality & (~PER_MASK))); \
- current_thread_info()->sys_call_table = \
+ current->thread.sys_call_table = \
(unsigned long) &sys_call_table; \
} while (0)
#else /* CONFIG_COMPAT */
@@ -204,11 +204,11 @@ do { \
(current->personality & ~PER_MASK)); \
if ((ex).e_ident[EI_CLASS] == ELFCLASS32) { \
set_thread_flag(TIF_31BIT); \
- current_thread_info()->sys_call_table = \
+ current->thread.sys_call_table = \
(unsigned long) &sys_call_table_emu; \
} else { \
clear_thread_flag(TIF_31BIT); \
- current_thread_info()->sys_call_table = \
+ current->thread.sys_call_table = \
(unsigned long) &sys_call_table; \
} \
} while (0)
diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h
deleted file mode 100644
index 3b758f66e48b..000000000000
--- a/arch/s390/include/asm/facilities_src.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright IBM Corp. 2015
- */
-
-#ifndef S390_GEN_FACILITIES_C
-#error "This file can only be included by gen_facilities.c"
-#endif
-
-#include <linux/kconfig.h>
-
-struct facility_def {
- char *name;
- int *bits;
-};
-
-static struct facility_def facility_defs[] = {
- {
- /*
- * FACILITIES_ALS contains the list of facilities that are
- * required to run a kernel that is compiled e.g. with
- * -march=<machine>.
- */
- .name = "FACILITIES_ALS",
- .bits = (int[]){
-#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES
- 0, /* N3 instructions */
- 1, /* z/Arch mode installed */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
- 18, /* long displacement facility */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
- 7, /* stfle */
- 17, /* message security assist */
- 21, /* extended-immediate facility */
- 25, /* store clock fast */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
- 27, /* mvcos */
- 32, /* compare and swap and store */
- 33, /* compare and swap and store 2 */
- 34, /* general extension facility */
- 35, /* execute extensions */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
- 45, /* fast-BCR, etc. */
-#endif
-#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
- 49, /* misc-instruction-extensions */
- 52, /* interlocked facility 2 */
-#endif
-#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
- 53, /* load-and-zero-rightmost-byte, etc. */
-#endif
- -1 /* END */
- }
- },
- {
- .name = "FACILITIES_KVM",
- .bits = (int[]){
- 0, /* N3 instructions */
- 1, /* z/Arch mode installed */
- 2, /* z/Arch mode active */
- 3, /* DAT-enhancement */
- 4, /* idte segment table */
- 5, /* idte region table */
- 6, /* ASN-and-LX reuse */
- 7, /* stfle */
- 8, /* enhanced-DAT 1 */
- 9, /* sense-running-status */
- 10, /* conditional sske */
- 13, /* ipte-range */
- 14, /* nonquiescing key-setting */
- 73, /* transactional execution */
- 75, /* access-exception-fetch/store indication */
- 76, /* msa extension 3 */
- 77, /* msa extension 4 */
- 78, /* enhanced-DAT 2 */
- -1 /* END */
- }
- },
-};
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 4da22b2f0521..edb5161df7e2 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -97,7 +97,7 @@ void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
extern void do_reipl(void);
extern void do_halt(void);
extern void do_poff(void);
-extern void ipl_save_parameters(void);
+extern void ipl_verify_parameters(void);
extern void ipl_update_parameters(void);
extern size_t append_ipl_vmparm(char *, size_t);
extern size_t append_ipl_scpdata(char *, size_t);
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 7b93b78f423c..9bfad2ad6312 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -95,7 +95,7 @@ struct lowcore {
/* Current process. */
__u64 current_task; /* 0x0310 */
- __u64 thread_info; /* 0x0318 */
+ __u8 pad_0x318[0x320-0x318]; /* 0x0318 */
__u64 kernel_stack; /* 0x0320 */
/* Interrupt, panic and restart stack. */
@@ -126,7 +126,8 @@ struct lowcore {
__u64 percpu_offset; /* 0x0378 */
__u64 vdso_per_cpu_data; /* 0x0380 */
__u64 machine_flags; /* 0x0388 */
- __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */
+ __u32 preempt_count; /* 0x0390 */
+ __u8 pad_0x0394[0x0398-0x0394]; /* 0x0394 */
__u64 gmap; /* 0x0398 */
__u32 spinlock_lockval; /* 0x03a0 */
__u32 fpu_flags; /* 0x03a4 */
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index e75c64cbcf08..c232ef9711f5 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -46,6 +46,8 @@ struct clp_fh_list_entry {
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
+extern bool zpci_unique_uid;
+
/* List PCI functions request */
struct clp_req_list_pci {
struct clp_req_hdr hdr;
@@ -59,7 +61,8 @@ struct clp_rsp_list_pci {
u64 resume_token;
u32 reserved2;
u16 max_fn;
- u8 reserved3;
+ u8 : 7;
+ u8 uid_checking : 1;
u8 entry_size;
struct clp_fh_list_entry fh_list[CLP_FH_LIST_NR_ENTRIES];
} __packed;
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index f4eb9843eed4..166f703dad7c 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -27,17 +27,17 @@ extern int page_table_allocate_pgste;
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
- typedef struct { char _[n]; } addrtype;
-
- *s = val;
- n = (n / 256) - 1;
- asm volatile(
- " mvc 8(248,%0),0(%0)\n"
- "0: mvc 256(256,%0),0(%0)\n"
- " la %0,256(%0)\n"
- " brct %1,0b\n"
- : "+a" (s), "+d" (n), "=m" (*(addrtype *) s)
- : "m" (*(addrtype *) s));
+ struct addrtype { char _[256]; };
+ int i;
+
+ for (i = 0; i < n; i += 256) {
+ *s = val;
+ asm volatile(
+ "mvc 8(248,%[s]),0(%[s])\n"
+ : "+m" (*(struct addrtype *) s)
+ : [s] "a" (s));
+ s += 256 / sizeof(long);
+ }
}
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
new file mode 100644
index 000000000000..b0776b2c8dcf
--- /dev/null
+++ b/arch/s390/include/asm/preempt.h
@@ -0,0 +1,137 @@
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <asm/current.h>
+#include <linux/thread_info.h>
+#include <asm/atomic_ops.h>
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED;
+}
+
+static inline void preempt_count_set(int pc)
+{
+ int old, new;
+
+ do {
+ old = READ_ONCE(S390_lowcore.preempt_count);
+ new = (old & PREEMPT_NEED_RESCHED) |
+ (pc & ~PREEMPT_NEED_RESCHED);
+ } while (__atomic_cmpxchg(&S390_lowcore.preempt_count,
+ old, new) != old);
+}
+
+#define init_task_preempt_count(p) do { } while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+ __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+ __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED);
+}
+
+static inline void __preempt_count_add(int val)
+{
+ if (__builtin_constant_p(val) && (val >= -128) && (val <= 127))
+ __atomic_add_const(val, &S390_lowcore.preempt_count);
+ else
+ __atomic_add(val, &S390_lowcore.preempt_count);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ __preempt_count_add(-val);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ return __atomic_add(-1, &S390_lowcore.preempt_count) == 1;
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ return unlikely(READ_ONCE(S390_lowcore.preempt_count) ==
+ preempt_offset);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#define PREEMPT_ENABLED (0)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(S390_lowcore.preempt_count);
+}
+
+static inline void preempt_count_set(int pc)
+{
+ S390_lowcore.preempt_count = pc;
+}
+
+#define init_task_preempt_count(p) do { } while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ S390_lowcore.preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return false;
+}
+
+static inline void __preempt_count_add(int val)
+{
+ S390_lowcore.preempt_count += val;
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ S390_lowcore.preempt_count -= val;
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ return !--S390_lowcore.preempt_count && tif_need_resched();
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ return unlikely(preempt_count() == preempt_offset &&
+ tif_need_resched());
+}
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+#ifdef CONFIG_PREEMPT
+extern asmlinkage void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+extern asmlinkage void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 9d3a21aedc97..6bca916a5ba0 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -110,14 +110,20 @@ typedef struct {
struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
+ unsigned long user_timer; /* task cputime in user space */
+ unsigned long system_timer; /* task cputime in kernel space */
+ unsigned long sys_call_table; /* system call table address */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
+ /* Per-thread information related to debugging */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
+ unsigned int system_call; /* system call number in signal */
+ unsigned long last_break; /* last breaking-event-address. */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 2ad9c204b1a2..8db92a5b3bf1 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -101,7 +101,8 @@ struct zpci_report_error_header {
u8 data[0]; /* Subsequent Data passed verbatim to SCLP ET 24 */
} __packed;
-int sclp_get_core_info(struct sclp_core_info *info);
+int _sclp_get_core_info_early(struct sclp_core_info *info);
+int _sclp_get_core_info(struct sclp_core_info *info);
int sclp_core_configure(u8 core);
int sclp_core_deconfigure(u8 core);
int sclp_sdias_blk_count(void);
@@ -119,4 +120,11 @@ void sclp_early_detect(void);
void _sclp_print_early(const char *);
void sclp_ocf_cpc_name_copy(char *dst);
+static inline int sclp_get_core_info(struct sclp_core_info *info, int early)
+{
+ if (early)
+ return _sclp_get_core_info_early(info);
+ return _sclp_get_core_info(info);
+}
+
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h
index 4af99cdaddf5..17a7904f001a 100644
--- a/arch/s390/include/asm/scsw.h
+++ b/arch/s390/include/asm/scsw.h
@@ -96,7 +96,8 @@ struct tm_scsw {
u32 dstat:8;
u32 cstat:8;
u32 fcxs:8;
- u32 schxs:8;
+ u32 ifob:1;
+ u32 sesq:7;
} __attribute__ ((packed));
/**
@@ -177,6 +178,9 @@ union scsw {
#define SCHN_STAT_INTF_CTRL_CHK 0x02
#define SCHN_STAT_CHAIN_CHECK 0x01
+#define SCSW_SESQ_DEV_NOFCX 3
+#define SCSW_SESQ_PATH_NOFCX 4
+
/*
* architectured values for first sense byte
*/
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 0cc383b9be7f..3deb134587b7 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -36,6 +36,7 @@ extern void smp_yield_cpu(int cpu);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
extern void smp_fill_possible_mask(void);
+extern void smp_detect_cpus(void);
#else /* CONFIG_SMP */
@@ -56,6 +57,7 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_detect_cpus(void) { }
#endif /* CONFIG_SMP */
@@ -69,6 +71,12 @@ static inline void smp_stop_cpu(void)
}
}
+/* Return thread 0 CPU number as base CPU */
+static inline int smp_get_base_cpu(int cpu)
+{
+ return cpu - (cpu % (smp_cpu_mtid + 1));
+}
+
#ifdef CONFIG_HOTPLUG_CPU
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
diff --git a/arch/s390/include/asm/string.h b/arch/s390/include/asm/string.h
index 8662f5c8e17f..15a3c005c274 100644
--- a/arch/s390/include/asm/string.h
+++ b/arch/s390/include/asm/string.h
@@ -14,6 +14,7 @@
#define __HAVE_ARCH_MEMCHR /* inline & arch function */
#define __HAVE_ARCH_MEMCMP /* arch function */
#define __HAVE_ARCH_MEMCPY /* gcc builtin & arch function */
+#define __HAVE_ARCH_MEMMOVE /* gcc builtin & arch function */
#define __HAVE_ARCH_MEMSCAN /* inline & arch function */
#define __HAVE_ARCH_MEMSET /* gcc builtin & arch function */
#define __HAVE_ARCH_STRCAT /* inline & arch function */
@@ -32,6 +33,7 @@
extern int memcmp(const void *, const void *, size_t);
extern void *memcpy(void *, const void *, size_t);
extern void *memset(void *, int, size_t);
+extern void *memmove(void *, const void *, size_t);
extern int strcmp(const char *,const char *);
extern size_t strlcat(char *, const char *, size_t);
extern size_t strlcpy(char *, const char *, size_t);
@@ -40,7 +42,6 @@ extern char *strncpy(char *, const char *, size_t);
extern char *strrchr(const char *, int);
extern char *strstr(const char *, const char *);
-#undef __HAVE_ARCH_MEMMOVE
#undef __HAVE_ARCH_STRCHR
#undef __HAVE_ARCH_STRNCHR
#undef __HAVE_ARCH_STRNCMP
diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h
index 2728114d5484..229326c942c7 100644
--- a/arch/s390/include/asm/sysinfo.h
+++ b/arch/s390/include/asm/sysinfo.h
@@ -107,6 +107,11 @@ struct sysinfo_2_2_2 {
char reserved_3[5];
unsigned short cpus_dedicated;
unsigned short cpus_shared;
+ char reserved_4[3];
+ unsigned char vsne;
+ uuid_be uuid;
+ char reserved_5[160];
+ char ext_name[256];
};
#define LPAR_CHAR_DEDICATED (1 << 7)
@@ -127,7 +132,7 @@ struct sysinfo_3_2_2 {
unsigned int caf;
char cpi[16];
char reserved_1[3];
- char ext_name_encoding;
+ unsigned char evmne;
unsigned int reserved_2;
uuid_be uuid;
} vm[8];
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index f15c0398c363..a5b54a445eb8 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -12,10 +12,10 @@
/*
* Size of kernel stack for each process
*/
-#define THREAD_ORDER 2
+#define THREAD_SIZE_ORDER 2
#define ASYNC_ORDER 2
-#define THREAD_SIZE (PAGE_SIZE << THREAD_ORDER)
+#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
#ifndef __ASSEMBLY__
@@ -30,15 +30,7 @@
* - if the contents of this structure are changed, the assembly constants must also be changed
*/
struct thread_info {
- struct task_struct *task; /* main task structure */
unsigned long flags; /* low level flags */
- unsigned long sys_call_table; /* System call table address */
- unsigned int cpu; /* current CPU */
- int preempt_count; /* 0 => preemptable, <0 => BUG */
- unsigned int system_call;
- __u64 user_timer;
- __u64 system_timer;
- unsigned long last_break; /* last breaking-event-address. */
};
/*
@@ -46,26 +38,14 @@ struct thread_info {
*/
#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
.flags = 0, \
- .cpu = 0, \
- .preempt_count = INIT_PREEMPT_COUNT, \
}
-#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
-/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
- return (struct thread_info *) S390_lowcore.thread_info;
-}
-
void arch_release_task_struct(struct task_struct *tsk);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
-#define THREAD_SIZE_ORDER THREAD_ORDER
-
#endif
/*
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 0bb08f341c09..de8298800722 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -52,11 +52,9 @@ static inline void store_clock_comparator(__u64 *time)
void clock_comparator_work(void);
-void __init ptff_init(void);
+void __init time_early_init(void);
extern unsigned char ptff_function_mask[16];
-extern unsigned long lpar_offset;
-extern unsigned long initial_leap_seconds;
/* Function codes for the ptff instruction. */
#define PTFF_QAF 0x00 /* query available functions */
@@ -100,21 +98,28 @@ struct ptff_qui {
unsigned int pad_0x5c[41];
} __packed;
-static inline int ptff(void *ptff_block, size_t len, unsigned int func)
-{
- typedef struct { char _[len]; } addrtype;
- register unsigned int reg0 asm("0") = func;
- register unsigned long reg1 asm("1") = (unsigned long) ptff_block;
- int rc;
-
- asm volatile(
- " .word 0x0104\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc), "+m" (*(addrtype *) ptff_block)
- : "d" (reg0), "d" (reg1) : "cc");
- return rc;
-}
+/*
+ * ptff - Perform timing facility function
+ * @ptff_block: Pointer to ptff parameter block
+ * @len: Length of parameter block
+ * @func: Function code
+ * Returns: Condition code (0 on success)
+ */
+#define ptff(ptff_block, len, func) \
+({ \
+ struct addrtype { char _[len]; }; \
+ register unsigned int reg0 asm("0") = func; \
+ register unsigned long reg1 asm("1") = (unsigned long) (ptff_block);\
+ int rc; \
+ \
+ asm volatile( \
+ " .word 0x0104\n" \
+ " ipm %0\n" \
+ " srl %0,28\n" \
+ : "=d" (rc), "+m" (*(struct addrtype *) reg1) \
+ : "d" (reg0), "d" (reg1) : "cc"); \
+ rc; \
+})
static inline unsigned long long local_tick_disable(void)
{
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index f15f5571ca2b..fa1bfce10370 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -22,21 +22,22 @@ struct cpu_topology_s390 {
cpumask_t drawer_mask;
};
-DECLARE_PER_CPU(struct cpu_topology_s390, cpu_topology);
-
-#define topology_physical_package_id(cpu) (per_cpu(cpu_topology, cpu).socket_id)
-#define topology_thread_id(cpu) (per_cpu(cpu_topology, cpu).thread_id)
-#define topology_sibling_cpumask(cpu) \
- (&per_cpu(cpu_topology, cpu).thread_mask)
-#define topology_core_id(cpu) (per_cpu(cpu_topology, cpu).core_id)
-#define topology_core_cpumask(cpu) (&per_cpu(cpu_topology, cpu).core_mask)
-#define topology_book_id(cpu) (per_cpu(cpu_topology, cpu).book_id)
-#define topology_book_cpumask(cpu) (&per_cpu(cpu_topology, cpu).book_mask)
-#define topology_drawer_id(cpu) (per_cpu(cpu_topology, cpu).drawer_id)
-#define topology_drawer_cpumask(cpu) (&per_cpu(cpu_topology, cpu).drawer_mask)
+extern struct cpu_topology_s390 cpu_topology[NR_CPUS];
+extern cpumask_t cpus_with_topology;
+
+#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id)
+#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id)
+#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_mask)
+#define topology_core_id(cpu) (cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask)
+#define topology_book_id(cpu) (cpu_topology[cpu].book_id)
+#define topology_book_cpumask(cpu) (&cpu_topology[cpu].book_mask)
+#define topology_drawer_id(cpu) (cpu_topology[cpu].drawer_id)
+#define topology_drawer_cpumask(cpu) (&cpu_topology[cpu].drawer_mask)
#define mc_capable() 1
+void topology_init_early(void);
int topology_cpu_init(struct cpu *);
int topology_set_cpu_management(int fc);
void topology_schedule_update(void);
@@ -46,6 +47,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu);
#else /* CONFIG_SCHED_TOPOLOGY */
+static inline void topology_init_early(void) { }
static inline void topology_schedule_update(void) { }
static inline int topology_cpu_init(struct cpu *cpu) { return 0; }
static inline void topology_expect_change(void) { }
@@ -65,7 +67,7 @@ static inline void topology_expect_change(void) { }
#define cpu_to_node cpu_to_node
static inline int cpu_to_node(int cpu)
{
- return per_cpu(cpu_topology, cpu).node_id;
+ return cpu_topology[cpu].node_id;
}
/* Returns a pointer to the cpumask of CPUs on node 'node'. */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 52d7c8709279..f82b04e85a21 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -37,14 +37,14 @@
#define get_ds() (KERNEL_DS)
#define get_fs() (current->thread.mm_segment)
-#define set_fs(x) \
-({ \
+#define set_fs(x) \
+{ \
unsigned long __pto; \
current->thread.mm_segment = (x); \
__pto = current->thread.mm_segment.ar4 ? \
S390_lowcore.user_asce : S390_lowcore.kernel_asce; \
__ctl_load(__pto, 7, 7); \
-})
+}
#define segment_eq(a,b) ((a).ar4 == (b).ar4)
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index d0a2dbf2433d..88bdc477a843 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -33,6 +33,8 @@ struct vdso_data {
__u32 ectg_available; /* ECTG instruction present 0x58 */
__u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */
__u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
+ __u32 ts_dir; /* TOD steering direction 0x64 */
+ __u64 ts_end; /* TOD steering end 0x68 */
};
struct vdso_per_cpu_data {
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index cc44b09c25fc..bf736e764cb4 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -12,6 +12,7 @@ header-y += dasd.h
header-y += debug.h
header-y += errno.h
header-y += fcntl.h
+header-y += hypfs.h
header-y += ioctl.h
header-y += ioctls.h
header-y += ipcbuf.h
@@ -29,16 +30,16 @@ header-y += ptrace.h
header-y += qeth.h
header-y += resource.h
header-y += schid.h
+header-y += sclp_ctl.h
header-y += sembuf.h
header-y += setup.h
header-y += shmbuf.h
+header-y += sie.h
header-y += sigcontext.h
header-y += siginfo.h
header-y += signal.h
header-y += socket.h
header-y += sockios.h
-header-y += sclp_ctl.h
-header-y += sie.h
header-y += stat.h
header-y += statfs.h
header-y += swab.h
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 1f0fe98f6db9..36b5101c8606 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -2,20 +2,47 @@
# Makefile for the linux kernel.
#
-KCOV_INSTRUMENT_early.o := n
-KCOV_INSTRUMENT_sclp.o := n
-KCOV_INSTRUMENT_als.o := n
-
ifdef CONFIG_FUNCTION_TRACER
-# Don't trace early setup code and tracing code
-CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+
+# Do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+
+# Do not trace early setup code
+CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
+
+endif
+
+GCOV_PROFILE_als.o := n
+GCOV_PROFILE_early.o := n
+GCOV_PROFILE_sclp.o := n
+
+KCOV_INSTRUMENT_als.o := n
+KCOV_INSTRUMENT_early.o := n
+KCOV_INSTRUMENT_sclp.o := n
+
+UBSAN_SANITIZE_als.o := n
+UBSAN_SANITIZE_early.o := n
+UBSAN_SANITIZE_sclp.o := n
+
+#
+# Use -march=z900 for sclp.c and als.c to be able to print an error
+# message if the kernel is started on a machine which is too old
+#
+ifneq ($(CC_FLAGS_MARCH),-march=z900)
+CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
+CFLAGS_als.o += -march=z900
+CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
+CFLAGS_sclp.o += -march=z900
+AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
+AFLAGS_head.o += -march=z900
endif
#
# Passing null pointers is ok for smp code, since we access the lowcore here.
#
-CFLAGS_smp.o := -Wno-nonnull
+CFLAGS_smp.o := -Wno-nonnull
#
# Disable tailcall optimizations for stack / callchain walking functions
@@ -30,27 +57,7 @@ CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
#
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-CFLAGS_sysinfo.o += -w
-
-#
-# Use -march=z900 for sclp.c and als.c to be able to print an error
-# message if the kernel is started on a machine which is too old
-#
-CFLAGS_REMOVE_sclp.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE)
-ifneq ($(CC_FLAGS_MARCH),-march=z900)
-CFLAGS_REMOVE_sclp.o += $(CC_FLAGS_MARCH)
-CFLAGS_sclp.o += -march=z900
-CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
-CFLAGS_als.o += -march=z900
-AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
-AFLAGS_head.o += -march=z900
-endif
-GCOV_PROFILE_sclp.o := n
-GCOV_PROFILE_als.o := n
-UBSAN_SANITIZE_als.o := n
-UBSAN_SANITIZE_early.o := n
-UBSAN_SANITIZE_sclp.o := n
+CFLAGS_sysinfo.o += -w
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f3df9e0a5dec..c4b3570ded5b 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -25,12 +25,14 @@
int main(void)
{
/* task struct offsets */
- OFFSET(__TASK_thread_info, task_struct, stack);
+ OFFSET(__TASK_stack, task_struct, stack);
OFFSET(__TASK_thread, task_struct, thread);
OFFSET(__TASK_pid, task_struct, pid);
BLANK();
/* thread struct offsets */
OFFSET(__THREAD_ksp, thread_struct, ksp);
+ OFFSET(__THREAD_sysc_table, thread_struct, sys_call_table);
+ OFFSET(__THREAD_last_break, thread_struct, last_break);
OFFSET(__THREAD_FPU_fpc, thread_struct, fpu.fpc);
OFFSET(__THREAD_FPU_regs, thread_struct, fpu.regs);
OFFSET(__THREAD_per_cause, thread_struct, per_event.cause);
@@ -39,14 +41,7 @@ int main(void)
OFFSET(__THREAD_trap_tdb, thread_struct, trap_tdb);
BLANK();
/* thread info offsets */
- OFFSET(__TI_task, thread_info, task);
- OFFSET(__TI_flags, thread_info, flags);
- OFFSET(__TI_sysc_table, thread_info, sys_call_table);
- OFFSET(__TI_cpu, thread_info, cpu);
- OFFSET(__TI_precount, thread_info, preempt_count);
- OFFSET(__TI_user_timer, thread_info, user_timer);
- OFFSET(__TI_system_timer, thread_info, system_timer);
- OFFSET(__TI_last_break, thread_info, last_break);
+ OFFSET(__TI_flags, task_struct, thread_info.flags);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_ARGS, pt_regs, args);
@@ -79,6 +74,8 @@ int main(void)
OFFSET(__VDSO_ECTG_OK, vdso_data, ectg_available);
OFFSET(__VDSO_TK_MULT, vdso_data, tk_mult);
OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
+ OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
+ OFFSET(__VDSO_TS_END, vdso_data, ts_end);
OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
@@ -159,7 +156,6 @@ int main(void)
OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
OFFSET(__LC_MCCK_CLOCK, lowcore, mcck_clock);
OFFSET(__LC_CURRENT, lowcore, current_task);
- OFFSET(__LC_THREAD_INFO, lowcore, thread_info);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
OFFSET(__LC_PANIC_STACK, lowcore, panic_stack);
@@ -173,6 +169,7 @@ int main(void)
OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset);
OFFSET(__LC_VDSO_PER_CPU, lowcore, vdso_per_cpu_data);
OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
+ OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_PASTE, lowcore, paste);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 4af60374eba0..6f2a6ab13cb5 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -446,7 +446,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set,
/* set extra registers only for synchronous signals */
regs->gprs[4] = regs->int_code & 127;
regs->gprs[5] = regs->int_parm_long;
- regs->gprs[6] = task_thread_info(current)->last_break;
+ regs->gprs[6] = current->thread.last_break;
}
return 0;
@@ -523,7 +523,7 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
regs->gprs[2] = ksig->sig;
regs->gprs[3] = (__force __u64) &frame->info;
regs->gprs[4] = (__force __u64) &frame->uc;
- regs->gprs[5] = task_thread_info(current)->last_break;
+ regs->gprs[5] = current->thread.last_break;
return 0;
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 2374c5b46bbc..d038c8cea6cb 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -293,6 +293,7 @@ static noinline __init void setup_lowcore_early(void)
psw.addr = (unsigned long) s390_base_pgm_handler;
S390_lowcore.program_new_psw = psw;
s390_base_pgm_handler_fn = early_pgm_check_handler;
+ S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
}
static noinline __init void setup_facility_list(void)
@@ -391,7 +392,49 @@ static int __init cad_init(void)
}
early_initcall(cad_init);
-static __init void rescue_initrd(void)
+static __init void memmove_early(void *dst, const void *src, size_t n)
+{
+ unsigned long addr;
+ long incr;
+ psw_t old;
+
+ if (!n)
+ return;
+ incr = 1;
+ if (dst > src) {
+ incr = -incr;
+ dst += n - 1;
+ src += n - 1;
+ }
+ old = S390_lowcore.program_new_psw;
+ S390_lowcore.program_new_psw.mask = __extract_psw();
+ asm volatile(
+ " larl %[addr],1f\n"
+ " stg %[addr],%[psw_pgm_addr]\n"
+ "0: mvc 0(1,%[dst]),0(%[src])\n"
+ " agr %[dst],%[incr]\n"
+ " agr %[src],%[incr]\n"
+ " brctg %[n],0b\n"
+ "1:\n"
+ : [addr] "=&d" (addr),
+ [psw_pgm_addr] "=&Q" (S390_lowcore.program_new_psw.addr),
+ [dst] "+&a" (dst), [src] "+&a" (src), [n] "+d" (n)
+ : [incr] "d" (incr)
+ : "cc", "memory");
+ S390_lowcore.program_new_psw = old;
+}
+
+static __init noinline void ipl_save_parameters(void)
+{
+ void *src, *dst;
+
+ src = (void *)(unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ dst = (void *) IPL_PARMBLOCK_ORIGIN;
+ memmove_early(dst, src, PAGE_SIZE);
+ S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
+}
+
+static __init noinline void rescue_initrd(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20);
@@ -405,7 +448,7 @@ static __init void rescue_initrd(void)
return;
if (INITRD_START >= min_initrd_addr)
return;
- memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
+ memmove_early((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE);
INITRD_START = min_initrd_addr;
#endif
}
@@ -467,7 +510,8 @@ void __init startup_init(void)
ipl_save_parameters();
rescue_initrd();
clear_bss_section();
- ptff_init();
+ ipl_verify_parameters();
+ time_early_init();
init_kernel_storage_key();
lockdep_off();
setup_lowcore_early();
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 49a30737adde..97298c58b2be 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -42,7 +42,7 @@ __PT_R13 = __PT_GPRS + 104
__PT_R14 = __PT_GPRS + 112
__PT_R15 = __PT_GPRS + 120
-STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
+STACK_SHIFT = PAGE_SHIFT + THREAD_SIZE_ORDER
STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
@@ -123,8 +123,14 @@ _PIF_WORK = (_PIF_PER_TRAP)
.macro LAST_BREAK scratch
srag \scratch,%r10,23
+#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
jz .+10
- stg %r10,__TI_last_break(%r12)
+ stg %r10,__TASK_thread+__THREAD_last_break(%r12)
+#else
+ jz .+14
+ lghi \scratch,__TASK_thread
+ stg %r10,__THREAD_last_break(\scratch,%r12)
+#endif
.endm
.macro REENABLE_IRQS
@@ -186,14 +192,13 @@ ENTRY(__switch_to)
stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task
lgr %r1,%r2
aghi %r1,__TASK_thread # thread_struct of prev task
- lg %r5,__TASK_thread_info(%r3) # get thread_info of next
+ lg %r5,__TASK_stack(%r3) # start of kernel stack of next
stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev
lgr %r1,%r3
aghi %r1,__TASK_thread # thread_struct of next task
lgr %r15,%r5
aghi %r15,STACK_INIT # end of kernel stack of next
stg %r3,__LC_CURRENT # store task struct of next
- stg %r5,__LC_THREAD_INFO # store thread info of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
lg %r15,__THREAD_ksp(%r1) # load kernel stack of next
/* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */
@@ -274,7 +279,7 @@ ENTRY(system_call)
.Lsysc_stmg:
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
+ lg %r12,__LC_CURRENT
lghi %r14,_PIF_SYSCALL
.Lsysc_per:
lg %r15,__LC_KERNEL_STACK
@@ -288,7 +293,13 @@ ENTRY(system_call)
mvc __PT_INT_CODE(4,%r11),__LC_SVC_ILC
stg %r14,__PT_FLAGS(%r11)
.Lsysc_do_svc:
- lg %r10,__TI_sysc_table(%r12) # address of system call table
+ # load address of system call table
+#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
+ lg %r10,__TASK_thread+__THREAD_sysc_table(%r12)
+#else
+ lghi %r13,__TASK_thread
+ lg %r10,__THREAD_sysc_table(%r13,%r12)
+#endif
llgh %r8,__PT_INT_CODE+2(%r11)
slag %r8,%r8,2 # shift and test for svc 0
jnz .Lsysc_nr_ok
@@ -389,7 +400,6 @@ ENTRY(system_call)
TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL
jno .Lsysc_return
lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
- lg %r10,__TI_sysc_table(%r12) # address of system call table
lghi %r8,0 # svc 0 returns -ENOSYS
llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
cghi %r1,NR_syscalls
@@ -457,7 +467,7 @@ ENTRY(system_call)
#
ENTRY(ret_from_fork)
la %r11,STACK_FRAME_OVERHEAD(%r15)
- lg %r12,__LC_THREAD_INFO
+ lg %r12,__LC_CURRENT
brasl %r14,schedule_tail
TRACE_IRQS_ON
ssm __LC_SVC_NEW_PSW # reenable interrupts
@@ -478,7 +488,7 @@ ENTRY(pgm_check_handler)
stpt __LC_SYNC_ENTER_TIMER
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
+ lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit
@@ -501,7 +511,7 @@ ENTRY(pgm_check_handler)
2: LAST_BREAK %r14
UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
lg %r15,__LC_KERNEL_STACK
- lg %r14,__TI_task(%r12)
+ lgr %r14,%r12
aghi %r14,__TASK_thread # pointer to thread_struct
lghi %r13,__LC_PGM_TDB
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
@@ -567,7 +577,7 @@ ENTRY(io_int_handler)
stpt __LC_ASYNC_ENTER_TIMER
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
+ lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_IO_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
@@ -626,7 +636,7 @@ ENTRY(io_int_handler)
jo .Lio_work_user # yes -> do resched & signal
#ifdef CONFIG_PREEMPT
# check for preemptive scheduling
- icm %r0,15,__TI_precount(%r12)
+ icm %r0,15,__LC_PREEMPT_COUNT
jnz .Lio_restore # preemption is disabled
TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED
jno .Lio_restore
@@ -741,7 +751,7 @@ ENTRY(ext_int_handler)
stpt __LC_ASYNC_ENTER_TIMER
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
+ lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_EXT_OLD_PSW
SWITCH_ASYNC __LC_SAVE_AREA_ASYNC,__LC_ASYNC_ENTER_TIMER
@@ -798,13 +808,10 @@ ENTRY(save_fpu_regs)
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
bor %r14
stfpc __THREAD_FPU_fpc(%r2)
-.Lsave_fpu_regs_fpc_end:
lg %r3,__THREAD_FPU_regs(%r2)
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
jz .Lsave_fpu_regs_fp # no -> store FP regs
-.Lsave_fpu_regs_vx_low:
VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3)
-.Lsave_fpu_regs_vx_high:
VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3)
j .Lsave_fpu_regs_done # -> set CIF_FPU flag
.Lsave_fpu_regs_fp:
@@ -851,9 +858,7 @@ load_fpu_regs:
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX
lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area
jz .Lload_fpu_regs_fp # -> no VX, load FP regs
-.Lload_fpu_regs_vx:
VLM %v0,%v15,0,%r4
-.Lload_fpu_regs_vx_high:
VLM %v16,%v31,256,%r4
j .Lload_fpu_regs_done
.Lload_fpu_regs_fp:
@@ -889,7 +894,7 @@ ENTRY(mcck_int_handler)
spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_THREAD_INFO
+ lg %r12,__LC_CURRENT
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_MCK_OLD_PSW
TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
@@ -948,7 +953,7 @@ ENTRY(mcck_int_handler)
.Lmcck_panic:
lg %r15,__LC_PANIC_STACK
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+ la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip
#
@@ -1085,7 +1090,7 @@ cleanup_critical:
jhe 0f
# set up saved registers r10 and r12
stg %r10,16(%r11) # r10 last break
- stg %r12,32(%r11) # r12 thread-info pointer
+ stg %r12,32(%r11) # r12 task struct pointer
0: # check if the user time update has been done
clg %r9,BASED(.Lcleanup_system_call_insn+24)
jh 0f
@@ -1106,7 +1111,9 @@ cleanup_critical:
lg %r9,16(%r11)
srag %r9,%r9,23
jz 0f
- mvc __TI_last_break(8,%r12),16(%r11)
+ lgr %r9,%r12
+ aghi %r9,__TASK_thread
+ mvc __THREAD_last_break(8,%r9),16(%r11)
0: # set up saved register r11
lg %r15,__LC_KERNEL_STACK
la %r9,STACK_FRAME_OVERHEAD(%r15)
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 4431905f8cfa..0b5ebf8a3d30 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -315,7 +315,7 @@ ENTRY(startup_kdump)
jg startup_continue
.Lstack:
- .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_ORDER))
+ .long 0x8000 + (1<<(PAGE_SHIFT+THREAD_SIZE_ORDER))
.align 8
6: .long 0x7fffffff,0xffffffff
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 03c2b469c472..482d3526e32b 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -32,11 +32,10 @@ ENTRY(startup_continue)
#
# Setup stack
#
- larl %r15,init_thread_union
- stg %r15,__LC_THREAD_INFO # cache thread info in lowcore
- lg %r14,__TI_task(%r15) # cache current in lowcore
+ larl %r14,init_task
stg %r14,__LC_CURRENT
- aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE
+ larl %r15,init_thread_union
+ aghi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) # init_task_union + THREAD_SIZE
stg %r15,__LC_KERNEL_STACK # set end of kernel stack
aghi %r15,-160
#
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 295bfb7124bc..ff3364a067ff 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -1991,10 +1991,9 @@ void __init ipl_update_parameters(void)
diag308_set_works = 1;
}
-void __init ipl_save_parameters(void)
+void __init ipl_verify_parameters(void)
{
struct cio_iplinfo iplinfo;
- void *src, *dst;
if (cio_get_iplinfo(&iplinfo))
return;
@@ -2005,10 +2004,6 @@ void __init ipl_save_parameters(void)
if (!iplinfo.is_qdio)
return;
ipl_flags |= IPL_PARMBLOCK_VALID;
- src = (void *)(unsigned long)S390_lowcore.ipl_parmblock_ptr;
- dst = (void *)IPL_PARMBLOCK_ORIGIN;
- memmove(dst, src, PAGE_SIZE);
- S390_lowcore.ipl_parmblock_ptr = IPL_PARMBLOCK_ORIGIN;
}
static LIST_HEAD(rcall);
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 285d6561076d..ef60f4177331 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -168,7 +168,7 @@ void do_softirq_own_stack(void)
old = current_stack_pointer();
/* Check against async. stack address range. */
new = S390_lowcore.async_stack;
- if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) {
+ if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) {
/* Need to switch to the async. stack. */
new -= STACK_FRAME_OVERHEAD;
((struct stack_frame *) new)->back_chain = old;
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
index 6ea6d69339b5..ae7dff110054 100644
--- a/arch/s390/kernel/lgr.c
+++ b/arch/s390/kernel/lgr.c
@@ -5,7 +5,8 @@
* Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
*/
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <asm/facility.h>
@@ -183,4 +184,4 @@ static int __init lgr_init(void)
lgr_timer_set();
return 0;
}
-module_init(lgr_init);
+device_initcall(lgr_init);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index fcc634c1479a..763dec18edcd 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -995,39 +995,36 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr)
regs.int_parm = CPU_MF_INT_SF_PRA;
sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
- regs.psw.addr = sfr->basic.ia;
- if (sfr->basic.T)
- regs.psw.mask |= PSW_MASK_DAT;
- if (sfr->basic.W)
- regs.psw.mask |= PSW_MASK_WAIT;
- if (sfr->basic.P)
- regs.psw.mask |= PSW_MASK_PSTATE;
- switch (sfr->basic.AS) {
- case 0x0:
- regs.psw.mask |= PSW_ASC_PRIMARY;
- break;
- case 0x1:
- regs.psw.mask |= PSW_ASC_ACCREG;
- break;
- case 0x2:
- regs.psw.mask |= PSW_ASC_SECONDARY;
- break;
- case 0x3:
- regs.psw.mask |= PSW_ASC_HOME;
- break;
- }
+ psw_bits(regs.psw).ia = sfr->basic.ia;
+ psw_bits(regs.psw).t = sfr->basic.T;
+ psw_bits(regs.psw).w = sfr->basic.W;
+ psw_bits(regs.psw).p = sfr->basic.P;
+ psw_bits(regs.psw).as = sfr->basic.AS;
/*
- * A non-zero guest program parameter indicates a guest
- * sample.
- * Note that some early samples or samples from guests without
+ * Use the hardware provided configuration level to decide if the
+ * sample belongs to a guest or host. If that is not available,
+ * fall back to the following heuristics:
+ * A non-zero guest program parameter always indicates a guest
+ * sample. Some early samples or samples from guests without
* lpp usage would be misaccounted to the host. We use the asn
- * value as a heuristic to detect most of these guest samples.
- * If the value differs from the host hpp value, we assume
- * it to be a KVM guest.
+ * value as an addon heuristic to detect most of these guest samples.
+ * If the value differs from the host hpp value, we assume to be a
+ * KVM guest.
*/
- if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp)
+ switch (sfr->basic.CL) {
+ case 1: /* logical partition */
+ sde_regs->in_guest = 0;
+ break;
+ case 2: /* virtual machine */
sde_regs->in_guest = 1;
+ break;
+ default: /* old machine, use heuristics */
+ if (sfr->basic.gpp ||
+ sfr->basic.prim_asn != (u16)sfr->basic.hpp)
+ sde_regs->in_guest = 1;
+ break;
+ }
overflow = 0;
if (perf_exclude_event(event, &regs, sde_regs))
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index bba4fa74b321..400d14f0b9f5 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -103,7 +103,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
unsigned long arg, struct task_struct *p)
{
- struct thread_info *ti;
struct fake_frame
{
struct stack_frame sf;
@@ -121,9 +120,8 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
/* Initialize per thread user and system timer values */
- ti = task_thread_info(p);
- ti->user_timer = 0;
- ti->system_timer = 0;
+ p->thread.user_timer = 0;
+ p->thread.system_timer = 0;
frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 9336e824e2db..b81ab8882e2e 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -461,7 +461,7 @@ long arch_ptrace(struct task_struct *child, long request,
}
return 0;
case PTRACE_GET_LAST_BREAK:
- put_user(task_thread_info(child)->last_break,
+ put_user(child->thread.last_break,
(unsigned long __user *) data);
return 0;
case PTRACE_ENABLE_TE:
@@ -811,7 +811,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
}
return 0;
case PTRACE_GET_LAST_BREAK:
- put_user(task_thread_info(child)->last_break,
+ put_user(child->thread.last_break,
(unsigned int __user *) data);
return 0;
}
@@ -997,10 +997,10 @@ static int s390_last_break_get(struct task_struct *target,
if (count > 0) {
if (kbuf) {
unsigned long *k = kbuf;
- *k = task_thread_info(target)->last_break;
+ *k = target->thread.last_break;
} else {
unsigned long __user *u = ubuf;
- if (__put_user(task_thread_info(target)->last_break, u))
+ if (__put_user(target->thread.last_break, u))
return -EFAULT;
}
}
@@ -1113,7 +1113,7 @@ static int s390_system_call_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- unsigned int *data = &task_thread_info(target)->system_call;
+ unsigned int *data = &target->thread.system_call;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(unsigned int));
}
@@ -1123,7 +1123,7 @@ static int s390_system_call_set(struct task_struct *target,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
- unsigned int *data = &task_thread_info(target)->system_call;
+ unsigned int *data = &target->thread.system_call;
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
data, 0, sizeof(unsigned int));
}
@@ -1327,7 +1327,7 @@ static int s390_compat_last_break_get(struct task_struct *target,
compat_ulong_t last_break;
if (count > 0) {
- last_break = task_thread_info(target)->last_break;
+ last_break = target->thread.last_break;
if (kbuf) {
unsigned long *k = kbuf;
*k = last_break;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7f7ba5f23f13..adfac9f0a89f 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -35,6 +35,7 @@
#include <linux/root_dev.h>
#include <linux/console.h>
#include <linux/kernel_stat.h>
+#include <linux/dma-contiguous.h>
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/pfn.h>
@@ -303,7 +304,7 @@ static void __init setup_lowcore(void)
* Setup lowcore for boot cpu
*/
BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * 4096);
- lc = __alloc_bootmem_low(LC_PAGES * PAGE_SIZE, LC_PAGES * PAGE_SIZE, 0);
+ lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
lc->restart_psw.mask = PSW_KERNEL_BITS;
lc->restart_psw.addr = (unsigned long) restart_int_handler;
lc->external_new_psw.mask = PSW_KERNEL_BITS |
@@ -324,15 +325,15 @@ static void __init setup_lowcore(void)
lc->kernel_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
- __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0)
+ memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->panic_stack = (unsigned long)
- __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0)
+ memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->current_task = (unsigned long) init_thread_union.thread_info.task;
- lc->thread_info = (unsigned long) &init_thread_union;
+ lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags;
+ lc->preempt_count = S390_lowcore.preempt_count;
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
MAX_FACILITY_BIT/8);
@@ -349,7 +350,7 @@ static void __init setup_lowcore(void)
lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock;
- restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0);
+ restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE);
restart_stack += ASYNC_SIZE;
/*
@@ -412,7 +413,7 @@ static void __init setup_resources(void)
bss_resource.end = (unsigned long) &__bss_stop - 1;
for_each_memblock(memory, reg) {
- res = alloc_bootmem_low(sizeof(*res));
+ res = memblock_virt_alloc(sizeof(*res), 8);
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM";
@@ -426,7 +427,7 @@ static void __init setup_resources(void)
std_res->start > res->end)
continue;
if (std_res->end > res->end) {
- sub_res = alloc_bootmem_low(sizeof(*sub_res));
+ sub_res = memblock_virt_alloc(sizeof(*sub_res), 8);
*sub_res = *std_res;
sub_res->end = res->end;
std_res->start = res->end + 1;
@@ -445,7 +446,7 @@ static void __init setup_resources(void)
* part of the System RAM resource.
*/
if (crashk_res.end) {
- memblock_add(crashk_res.start, resource_size(&crashk_res));
+ memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
memblock_reserve(crashk_res.start, resource_size(&crashk_res));
insert_resource(&iomem_resource, &crashk_res);
}
@@ -903,6 +904,7 @@ void __init setup_arch(char **cmdline_p)
setup_memory_end();
setup_memory();
+ dma_contiguous_reserve(memory_end);
check_initrd();
reserve_crashkernel();
@@ -921,6 +923,8 @@ void __init setup_arch(char **cmdline_p)
cpu_detect_mhz_feature();
cpu_init();
numa_setup();
+ smp_detect_cpus();
+ topology_init_early();
/*
* Create kernel page tables and switch to virtual addressing.
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index d82562cf0a0e..9f241d1efeda 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -359,7 +359,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
/* set extra registers only for synchronous signals */
regs->gprs[4] = regs->int_code & 127;
regs->gprs[5] = regs->int_parm_long;
- regs->gprs[6] = task_thread_info(current)->last_break;
+ regs->gprs[6] = current->thread.last_break;
}
return 0;
}
@@ -430,7 +430,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
regs->gprs[2] = ksig->sig;
regs->gprs[3] = (unsigned long) &frame->info;
regs->gprs[4] = (unsigned long) &frame->uc;
- regs->gprs[5] = task_thread_info(current)->last_break;
+ regs->gprs[5] = current->thread.last_break;
return 0;
}
@@ -467,13 +467,13 @@ void do_signal(struct pt_regs *regs)
* the debugger may change all our registers, including the system
* call information.
*/
- current_thread_info()->system_call =
+ current->thread.system_call =
test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
if (get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */
- if (current_thread_info()->system_call) {
- regs->int_code = current_thread_info()->system_call;
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
/* Check for system call restarting. */
switch (regs->gprs[2]) {
case -ERESTART_RESTARTBLOCK:
@@ -506,8 +506,8 @@ void do_signal(struct pt_regs *regs)
/* No handlers present - check for system call restart */
clear_pt_regs_flag(regs, PIF_SYSCALL);
- if (current_thread_info()->system_call) {
- regs->int_code = current_thread_info()->system_call;
+ if (current->thread.system_call) {
+ regs->int_code = current->thread.system_call;
switch (regs->gprs[2]) {
case -ERESTART_RESTARTBLOCK:
/* Restart with sys_restart_syscall */
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index df4a508ff35c..e49f61aadaf9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -19,6 +19,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/workqueue.h>
+#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/mm.h>
@@ -259,16 +260,14 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
{
struct lowcore *lc = pcpu->lowcore;
- struct thread_info *ti = task_thread_info(tsk);
lc->kernel_stack = (unsigned long) task_stack_page(tsk)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
- lc->thread_info = (unsigned long) task_thread_info(tsk);
lc->current_task = (unsigned long) tsk;
lc->lpp = LPP_MAGIC;
lc->current_pid = tsk->pid;
- lc->user_timer = ti->user_timer;
- lc->system_timer = ti->system_timer;
+ lc->user_timer = tsk->thread.user_timer;
+ lc->system_timer = tsk->thread.system_timer;
lc->steal_timer = 0;
}
@@ -662,14 +661,12 @@ int smp_cpu_get_polarization(int cpu)
return pcpu_devices[cpu].polarization;
}
-static struct sclp_core_info *smp_get_core_info(void)
+static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
{
static int use_sigp_detection;
- struct sclp_core_info *info;
int address;
- info = kzalloc(sizeof(*info), GFP_KERNEL);
- if (info && (use_sigp_detection || sclp_get_core_info(info))) {
+ if (use_sigp_detection || sclp_get_core_info(info, early)) {
use_sigp_detection = 1;
for (address = 0;
address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
@@ -683,7 +680,6 @@ static struct sclp_core_info *smp_get_core_info(void)
}
info->combined = info->configured;
}
- return info;
}
static int smp_add_present_cpu(int cpu);
@@ -724,17 +720,15 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
return nr;
}
-static void __init smp_detect_cpus(void)
+void __init smp_detect_cpus(void)
{
unsigned int cpu, mtid, c_cpus, s_cpus;
struct sclp_core_info *info;
u16 address;
/* Get CPU information */
- info = smp_get_core_info();
- if (!info)
- panic("smp_detect_cpus failed to allocate memory\n");
-
+ info = memblock_virt_alloc(sizeof(*info), 8);
+ smp_get_core_info(info, 1);
/* Find boot CPU type */
if (sclp.has_core_type) {
address = stap();
@@ -770,7 +764,7 @@ static void __init smp_detect_cpus(void)
get_online_cpus();
__smp_rescan_cpus(info, 0);
put_online_cpus();
- kfree(info);
+ memblock_free_early((unsigned long)info, sizeof(*info));
}
/*
@@ -807,7 +801,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
pcpu = pcpu_devices + cpu;
if (pcpu->state != CPU_STATE_CONFIGURED)
return -EIO;
- base = cpu - (cpu % (smp_cpu_mtid + 1));
+ base = smp_get_base_cpu(cpu);
for (i = 0; i <= smp_cpu_mtid; i++) {
if (base + i < nr_cpu_ids)
if (cpu_online(base + i))
@@ -907,7 +901,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
/* request the 0x1202 external call external interrupt */
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
- smp_detect_cpus();
}
void __init smp_prepare_boot_cpu(void)
@@ -973,7 +966,7 @@ static ssize_t cpu_configure_store(struct device *dev,
rc = -EBUSY;
/* disallow configuration changes of online cpus and cpu 0 */
cpu = dev->id;
- cpu -= cpu % (smp_cpu_mtid + 1);
+ cpu = smp_get_base_cpu(cpu);
if (cpu == 0)
goto out;
for (i = 0; i <= smp_cpu_mtid; i++)
@@ -1106,9 +1099,10 @@ int __ref smp_rescan_cpus(void)
struct sclp_core_info *info;
int nr;
- info = smp_get_core_info();
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
+ smp_get_core_info(info, 0);
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
nr = __smp_rescan_cpus(info, 1);
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 2d6b6e81f812..1ff21f05d7dd 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -194,7 +194,7 @@ pgm_check_entry:
/* Suspend CPU not available -> panic */
larl %r15,init_thread_union
- ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER)
+ ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER)
larl %r2,.Lpanic_string
larl %r3,_sclp_print_early
lghi %r1,0
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index bfda6aa40280..24021c1e3ecb 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -56,6 +56,20 @@ int stsi(void *sysinfo, int fc, int sel1, int sel2)
}
EXPORT_SYMBOL(stsi);
+static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
+{
+ switch (encoding) {
+ case 1: /* EBCDIC */
+ EBCASC(name, len);
+ break;
+ case 2: /* UTF-8 */
+ break;
+ default:
+ return false;
+ }
+ return true;
+}
+
static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
{
int i;
@@ -207,24 +221,19 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
seq_printf(m, "LPAR CPUs S-MTID: %d\n", info->mt_stid);
seq_printf(m, "LPAR CPUs PS-MTID: %d\n", info->mt_psmtid);
}
+ if (convert_ext_name(info->vsne, info->ext_name, sizeof(info->ext_name))) {
+ seq_printf(m, "LPAR Extended Name: %-.256s\n", info->ext_name);
+ seq_printf(m, "LPAR UUID: %pUb\n", &info->uuid);
+ }
}
static void print_ext_name(struct seq_file *m, int lvl,
struct sysinfo_3_2_2 *info)
{
- if (info->vm[lvl].ext_name_encoding == 0)
- return;
- if (info->ext_names[lvl][0] == 0)
- return;
- switch (info->vm[lvl].ext_name_encoding) {
- case 1: /* EBCDIC */
- EBCASC(info->ext_names[lvl], sizeof(info->ext_names[lvl]));
- break;
- case 2: /* UTF-8 */
- break;
- default:
+ size_t len = sizeof(info->ext_names[lvl]);
+
+ if (!convert_ext_name(info->vm[lvl].evmne, info->ext_names[lvl], len))
return;
- }
seq_printf(m, "VM%02d Extended Name: %-.256s\n", lvl,
info->ext_names[lvl]);
}
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 0bfcc492987e..867d0a057046 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -59,19 +59,27 @@ ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
EXPORT_SYMBOL(s390_epoch_delta_notifier);
unsigned char ptff_function_mask[16];
-unsigned long lpar_offset;
-unsigned long initial_leap_seconds;
+
+static unsigned long long lpar_offset;
+static unsigned long long initial_leap_seconds;
+static unsigned long long tod_steering_end;
+static long long tod_steering_delta;
/*
* Get time offsets with PTFF
*/
-void __init ptff_init(void)
+void __init time_early_init(void)
{
struct ptff_qto qto;
struct ptff_qui qui;
+ /* Initialize TOD steering parameters */
+ tod_steering_end = sched_clock_base_cc;
+ vdso_data->ts_end = tod_steering_end;
+
if (!test_facility(28))
return;
+
ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
/* get LPAR offset */
@@ -80,7 +88,7 @@ void __init ptff_init(void)
/* get initial leap seconds */
if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
- initial_leap_seconds = (unsigned long)
+ initial_leap_seconds = (unsigned long long)
((long) qui.old_leap * 4096000000L);
}
@@ -123,18 +131,6 @@ void clock_comparator_work(void)
cd->event_handler(cd);
}
-/*
- * Fixup the clock comparator.
- */
-static void fixup_clock_comparator(unsigned long long delta)
-{
- /* If nobody is waiting there's nothing to fix. */
- if (S390_lowcore.clock_comparator == -1ULL)
- return;
- S390_lowcore.clock_comparator += delta;
- set_clock_comparator(S390_lowcore.clock_comparator);
-}
-
static int s390_next_event(unsigned long delta,
struct clock_event_device *evt)
{
@@ -215,7 +211,21 @@ void read_boot_clock64(struct timespec64 *ts)
static cycle_t read_tod_clock(struct clocksource *cs)
{
- return get_tod_clock();
+ unsigned long long now, adj;
+
+ preempt_disable(); /* protect from changes to steering parameters */
+ now = get_tod_clock();
+ adj = tod_steering_end - now;
+ if (unlikely((s64) adj >= 0))
+ /*
+ * manually steer by 1 cycle every 2^16 cycles. This
+ * corresponds to shifting the tod delta by 15. 1s is
+ * therefore steered in ~9h. The adjust will decrease
+ * over time, until it finally reaches 0.
+ */
+ now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+ preempt_enable();
+ return now;
}
static struct clocksource clocksource_tod = {
@@ -384,6 +394,55 @@ static inline int check_sync_clock(void)
return rc;
}
+/*
+ * Apply clock delta to the global data structures.
+ * This is called once on the CPU that performed the clock sync.
+ */
+static void clock_sync_global(unsigned long long delta)
+{
+ unsigned long now, adj;
+ struct ptff_qto qto;
+
+ /* Fixup the monotonic sched clock. */
+ sched_clock_base_cc += delta;
+ /* Adjust TOD steering parameters. */
+ vdso_data->tb_update_count++;
+ now = get_tod_clock();
+ adj = tod_steering_end - now;
+ if (unlikely((s64) adj >= 0))
+ /* Calculate how much of the old adjustment is left. */
+ tod_steering_delta = (tod_steering_delta < 0) ?
+ -(adj >> 15) : (adj >> 15);
+ tod_steering_delta += delta;
+ if ((abs(tod_steering_delta) >> 48) != 0)
+ panic("TOD clock sync offset %lli is too large to drift\n",
+ tod_steering_delta);
+ tod_steering_end = now + (abs(tod_steering_delta) << 15);
+ vdso_data->ts_dir = (tod_steering_delta < 0) ? 0 : 1;
+ vdso_data->ts_end = tod_steering_end;
+ vdso_data->tb_update_count++;
+ /* Update LPAR offset. */
+ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+ lpar_offset = qto.tod_epoch_difference;
+ /* Call the TOD clock change notifier. */
+ atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta);
+}
+
+/*
+ * Apply clock delta to the per-CPU data structures of this CPU.
+ * This is called for each online CPU after the call to clock_sync_global.
+ */
+static void clock_sync_local(unsigned long long delta)
+{
+ /* Add the delta to the clock comparator. */
+ if (S390_lowcore.clock_comparator != -1ULL) {
+ S390_lowcore.clock_comparator += delta;
+ set_clock_comparator(S390_lowcore.clock_comparator);
+ }
+ /* Adjust the last_update_clock time-stamp. */
+ S390_lowcore.last_update_clock += delta;
+}
+
/* Single threaded workqueue used for stp sync events */
static struct workqueue_struct *time_sync_wq;
@@ -397,31 +456,9 @@ static void __init time_init_wq(void)
struct clock_sync_data {
atomic_t cpus;
int in_sync;
- unsigned long long fixup_cc;
+ unsigned long long clock_delta;
};
-static void clock_sync_cpu(struct clock_sync_data *sync)
-{
- atomic_dec(&sync->cpus);
- enable_sync_clock();
- while (sync->in_sync == 0) {
- __udelay(1);
- /*
- * A different cpu changes *in_sync. Therefore use
- * barrier() to force memory access.
- */
- barrier();
- }
- if (sync->in_sync != 1)
- /* Didn't work. Clear per-cpu in sync bit again. */
- disable_sync_clock(NULL);
- /*
- * This round of TOD syncing is done. Set the clock comparator
- * to the next tick and let the processor continue.
- */
- fixup_clock_comparator(sync->fixup_cc);
-}
-
/*
* Server Time Protocol (STP) code.
*/
@@ -523,54 +560,46 @@ void stp_queue_work(void)
static int stp_sync_clock(void *data)
{
- static int first;
+ struct clock_sync_data *sync = data;
unsigned long long clock_delta;
- struct clock_sync_data *stp_sync;
- struct ptff_qto qto;
+ static int first;
int rc;
- stp_sync = data;
-
- if (xchg(&first, 1) == 1) {
- /* Slave */
- clock_sync_cpu(stp_sync);
- return 0;
- }
-
- /* Wait until all other cpus entered the sync function. */
- while (atomic_read(&stp_sync->cpus) != 0)
- cpu_relax();
-
enable_sync_clock();
-
- rc = 0;
- if (stp_info.todoff[0] || stp_info.todoff[1] ||
- stp_info.todoff[2] || stp_info.todoff[3] ||
- stp_info.tmd != 2) {
- rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
- if (rc == 0) {
- /* fixup the monotonic sched clock */
- sched_clock_base_cc += clock_delta;
- if (ptff_query(PTFF_QTO) &&
- ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
- /* Update LPAR offset */
- lpar_offset = qto.tod_epoch_difference;
- atomic_notifier_call_chain(&s390_epoch_delta_notifier,
- 0, &clock_delta);
- stp_sync->fixup_cc = clock_delta;
- fixup_clock_comparator(clock_delta);
- rc = chsc_sstpi(stp_page, &stp_info,
- sizeof(struct stp_sstpi));
- if (rc == 0 && stp_info.tmd != 2)
- rc = -EAGAIN;
+ if (xchg(&first, 1) == 0) {
+ /* Wait until all other cpus entered the sync function. */
+ while (atomic_read(&sync->cpus) != 0)
+ cpu_relax();
+ rc = 0;
+ if (stp_info.todoff[0] || stp_info.todoff[1] ||
+ stp_info.todoff[2] || stp_info.todoff[3] ||
+ stp_info.tmd != 2) {
+ rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
+ &clock_delta);
+ if (rc == 0) {
+ sync->clock_delta = clock_delta;
+ clock_sync_global(clock_delta);
+ rc = chsc_sstpi(stp_page, &stp_info,
+ sizeof(struct stp_sstpi));
+ if (rc == 0 && stp_info.tmd != 2)
+ rc = -EAGAIN;
+ }
}
+ sync->in_sync = rc ? -EAGAIN : 1;
+ xchg(&first, 0);
+ } else {
+ /* Slave */
+ atomic_dec(&sync->cpus);
+ /* Wait for in_sync to be set. */
+ while (READ_ONCE(sync->in_sync) == 0)
+ __udelay(1);
}
- if (rc) {
+ if (sync->in_sync != 1)
+ /* Didn't work. Clear per-cpu in sync bit again. */
disable_sync_clock(NULL);
- stp_sync->in_sync = -EAGAIN;
- } else
- stp_sync->in_sync = 1;
- xchg(&first, 0);
+ /* Apply clock delta to per-CPU fields of this CPU. */
+ clock_sync_local(sync->clock_delta);
+
return 0;
}
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index e959c02e0cac..93dcbae1e98d 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -7,6 +7,7 @@
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/workqueue.h>
+#include <linux/bootmem.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
@@ -41,15 +42,17 @@ static bool topology_enabled = true;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
- * Socket/Book linked lists and per_cpu(cpu_topology) updates are
+ * Socket/Book linked lists and cpu_topology updates are
* protected by "sched_domains_mutex".
*/
static struct mask_info socket_info;
static struct mask_info book_info;
static struct mask_info drawer_info;
-DEFINE_PER_CPU(struct cpu_topology_s390, cpu_topology);
-EXPORT_PER_CPU_SYMBOL_GPL(cpu_topology);
+struct cpu_topology_s390 cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+cpumask_t cpus_with_topology;
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
@@ -97,7 +100,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
if (lcpu < 0)
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
- topo = &per_cpu(cpu_topology, lcpu + i);
+ topo = &cpu_topology[lcpu + i];
topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->socket_id = socket->id;
@@ -106,6 +109,7 @@ static void add_cpus_to_mask(struct topology_core *tl_core,
cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
+ cpumask_set_cpu(lcpu + i, &cpus_with_topology);
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
}
@@ -220,7 +224,7 @@ static void update_cpu_masks(void)
int cpu;
for_each_possible_cpu(cpu) {
- topo = &per_cpu(cpu_topology, cpu);
+ topo = &cpu_topology[cpu];
topo->thread_mask = cpu_thread_map(cpu);
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
@@ -231,6 +235,8 @@ static void update_cpu_masks(void)
topo->socket_id = cpu;
topo->book_id = cpu;
topo->drawer_id = cpu;
+ if (cpu_present(cpu))
+ cpumask_set_cpu(cpu, &cpus_with_topology);
}
}
numa_update_cpu_topology();
@@ -241,12 +247,12 @@ void store_topology(struct sysinfo_15_1_x *info)
stsi(info, 15, 1, min(topology_max_mnest, 4));
}
-int arch_update_cpu_topology(void)
+static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
- struct device *dev;
- int cpu, rc = 0;
+ int rc = 0;
+ cpumask_clear(&cpus_with_topology);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
store_topology(info);
@@ -255,6 +261,15 @@ int arch_update_cpu_topology(void)
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
+ return rc;
+}
+
+int arch_update_cpu_topology(void)
+{
+ struct device *dev;
+ int cpu, rc;
+
+ rc = __arch_update_cpu_topology();
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
@@ -394,23 +409,23 @@ int topology_cpu_init(struct cpu *cpu)
static const struct cpumask *cpu_thread_mask(int cpu)
{
- return &per_cpu(cpu_topology, cpu).thread_mask;
+ return &cpu_topology[cpu].thread_mask;
}
const struct cpumask *cpu_coregroup_mask(int cpu)
{
- return &per_cpu(cpu_topology, cpu).core_mask;
+ return &cpu_topology[cpu].core_mask;
}
static const struct cpumask *cpu_book_mask(int cpu)
{
- return &per_cpu(cpu_topology, cpu).book_mask;
+ return &cpu_topology[cpu].book_mask;
}
static const struct cpumask *cpu_drawer_mask(int cpu)
{
- return &per_cpu(cpu_topology, cpu).drawer_mask;
+ return &cpu_topology[cpu].drawer_mask;
}
static int __init early_parse_topology(char *p)
@@ -438,19 +453,20 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
- mask->next = kzalloc(sizeof(*mask->next), GFP_KERNEL);
+ mask->next = memblock_virt_alloc(sizeof(*mask->next), 8);
mask = mask->next;
}
}
-static int __init s390_topology_init(void)
+void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
int i;
+ set_sched_topology(s390_topology);
if (!MACHINE_HAS_TOPOLOGY)
- return 0;
- tl_info = (struct sysinfo_15_1_x *)__get_free_page(GFP_KERNEL);
+ goto out;
+ tl_info = memblock_virt_alloc(sizeof(*tl_info), PAGE_SIZE);
info = tl_info;
store_topology(info);
pr_info("The CPU configuration topology of the machine is:");
@@ -460,10 +476,9 @@ static int __init s390_topology_init(void)
alloc_masks(info, &socket_info, 1);
alloc_masks(info, &book_info, 2);
alloc_masks(info, &drawer_info, 3);
- set_sched_topology(s390_topology);
- return 0;
+out:
+ __arch_update_cpu_topology();
}
-early_initcall(s390_topology_init);
static int __init topology_init(void)
{
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index 5eec9afbb5b5..a5769b83d90e 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -99,8 +99,27 @@ __kernel_clock_gettime:
tml %r4,0x0001 /* pending update ? loop */
jnz 11b
stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,1(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
+ s %r0,1(%r15) /* no - ts_steering_end */
+ sl %r1,5(%r15)
+ brc 3,22f
+ ahi %r0,-1
+22: ltr %r0,%r0 /* past end of steering? */
+ jm 24f
+ srdl %r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 23f
+ lcr %r0,%r0 /* negative TOD offset */
+ lcr %r1,%r1
+ je 23f
+ ahi %r0,-1
+23: a %r0,1(%r15) /* add TOD timestamp */
+ al %r1,5(%r15)
+ brc 12,25f
+ ahi %r0,1
+ j 25f
+24: lm %r0,%r1,1(%r15) /* load TOD timestamp */
+25: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,12f
ahi %r0,-1
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
index 719de6186b20..63b86dceb0bf 100644
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -31,8 +31,27 @@ __kernel_gettimeofday:
tml %r4,0x0001 /* pending update ? loop */
jnz 1b
stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,1(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
+ s %r0,1(%r15)
+ sl %r1,5(%r15)
+ brc 3,14f
+ ahi %r0,-1
+14: ltr %r0,%r0 /* past end of steering? */
+ jm 16f
+ srdl %r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 15f
+ lcr %r0,%r0 /* negative TOD offset */
+ lcr %r1,%r1
+ je 15f
+ ahi %r0,-1
+15: a %r0,1(%r15) /* add TOD timestamp */
+ al %r1,5(%r15)
+ brc 12,17f
+ ahi %r0,1
+ j 17f
+16: lm %r0,%r1,1(%r15) /* load TOD timestamp */
+17: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,3f
ahi %r0,-1
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 61541fb93dc6..9c3b12626dba 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -83,8 +83,17 @@ __kernel_clock_gettime:
tmll %r4,0x0001 /* pending update ? loop */
jnz 5b
stcke 0(%r15) /* Store TOD clock */
- lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
lg %r1,1(%r15)
+ lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
+ slgr %r0,%r1 /* now - ts_steering_end */
+ ltgr %r0,%r0 /* past end of steering ? */
+ jm 17f
+ srlg %r0,%r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 18f
+ lcgr %r0,%r0 /* negative TOD offset */
+18: algr %r1,%r0 /* add steering offset */
+17: lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
index 6ce46707663c..b02e62f3bc12 100644
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -31,7 +31,16 @@ __kernel_gettimeofday:
jnz 0b
stcke 0(%r15) /* Store TOD clock */
lg %r1,1(%r15)
- sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
+ lg %r0,__VDSO_TS_END(%r5) /* TOD steering end time */
+ slgr %r0,%r1 /* now - ts_steering_end */
+ ltgr %r0,%r0 /* past end of steering ? */
+ jm 6f
+ srlg %r0,%r0,15 /* 1 per 2^16 */
+ tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
+ jz 7f
+ lcgr %r0,%r0 /* negative TOD offset */
+7: algr %r1,%r0 /* add steering offset */
+6: sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
lg %r0,__VDSO_XTIME_SEC(%r5) /* tk->xtime_sec */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 1bd5dde2d5a9..6b246aadf311 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -96,7 +96,6 @@ static void update_mt_scaling(void)
*/
static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
{
- struct thread_info *ti = task_thread_info(tsk);
u64 timer, clock, user, system, steal;
u64 user_scaled, system_scaled;
@@ -119,13 +118,13 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
update_mt_scaling();
- user = S390_lowcore.user_timer - ti->user_timer;
+ user = S390_lowcore.user_timer - tsk->thread.user_timer;
S390_lowcore.steal_timer -= user;
- ti->user_timer = S390_lowcore.user_timer;
+ tsk->thread.user_timer = S390_lowcore.user_timer;
- system = S390_lowcore.system_timer - ti->system_timer;
+ system = S390_lowcore.system_timer - tsk->thread.system_timer;
S390_lowcore.steal_timer -= system;
- ti->system_timer = S390_lowcore.system_timer;
+ tsk->thread.system_timer = S390_lowcore.system_timer;
user_scaled = user;
system_scaled = system;
@@ -153,15 +152,11 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
void vtime_task_switch(struct task_struct *prev)
{
- struct thread_info *ti;
-
do_account_vtime(prev, 0);
- ti = task_thread_info(prev);
- ti->user_timer = S390_lowcore.user_timer;
- ti->system_timer = S390_lowcore.system_timer;
- ti = task_thread_info(current);
- S390_lowcore.user_timer = ti->user_timer;
- S390_lowcore.system_timer = ti->system_timer;
+ prev->thread.user_timer = S390_lowcore.user_timer;
+ prev->thread.system_timer = S390_lowcore.system_timer;
+ S390_lowcore.user_timer = current->thread.user_timer;
+ S390_lowcore.system_timer = current->thread.system_timer;
}
/*
@@ -181,7 +176,6 @@ void vtime_account_user(struct task_struct *tsk)
*/
void vtime_account_irq_enter(struct task_struct *tsk)
{
- struct thread_info *ti = task_thread_info(tsk);
u64 timer, system, system_scaled;
timer = S390_lowcore.last_update_timer;
@@ -193,9 +187,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
update_mt_scaling();
- system = S390_lowcore.system_timer - ti->system_timer;
+ system = S390_lowcore.system_timer - tsk->thread.system_timer;
S390_lowcore.steal_timer -= system;
- ti->system_timer = S390_lowcore.system_timer;
+ tsk->thread.system_timer = S390_lowcore.system_timer;
system_scaled = system;
/* Do MT utilization scaling */
if (smp_cpu_mtid) {
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index be4db07f70d3..af13f1a135b6 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -415,7 +415,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
int rc;
mci.val = mchk->mcic;
- /* take care of lazy register loading via vcpu load/put */
+ /* take care of lazy register loading */
save_fpu_regs();
save_access_regs(vcpu->run->s.regs.acrs);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9c7a1ecfe6bd..bec71e902be3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1812,22 +1812,7 @@ __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
- /* Save host register state */
- save_fpu_regs();
- vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
- vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
-
- if (MACHINE_HAS_VX)
- current->thread.fpu.regs = vcpu->run->s.regs.vrs;
- else
- current->thread.fpu.regs = vcpu->run->s.regs.fprs;
- current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
- if (test_fp_ctl(current->thread.fpu.fpc))
- /* User space provided an invalid FPC, let's clear it */
- current->thread.fpu.fpc = 0;
- save_access_regs(vcpu->arch.host_acrs);
- restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.enabled_gmap);
atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
@@ -1844,16 +1829,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->arch.enabled_gmap = gmap_get_enabled();
gmap_disable(vcpu->arch.enabled_gmap);
- /* Save guest register state */
- save_fpu_regs();
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
-
- /* Restore host register state */
- current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
- current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
-
- save_access_regs(vcpu->run->s.regs.acrs);
- restore_access_regs(vcpu->arch.host_acrs);
}
static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
@@ -2243,7 +2218,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
{
memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
- restore_access_regs(vcpu->run->s.regs.acrs);
return 0;
}
@@ -2257,11 +2231,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- /* make sure the new values will be lazily loaded */
- save_fpu_regs();
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
- current->thread.fpu.fpc = fpu->fpc;
+ vcpu->run->s.regs.fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
(freg_t *) fpu->fprs);
@@ -2279,7 +2251,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
(__vector128 *) vcpu->run->s.regs.vrs);
else
memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
- fpu->fpc = current->thread.fpu.fpc;
+ fpu->fpc = vcpu->run->s.regs.fpc;
return 0;
}
@@ -2740,6 +2712,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (riccb->valid)
vcpu->arch.sie_block->ecb3 |= 0x01;
}
+ save_access_regs(vcpu->arch.host_acrs);
+ restore_access_regs(vcpu->run->s.regs.acrs);
+ /* save host (userspace) fprs/vrs */
+ save_fpu_regs();
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+ if (MACHINE_HAS_VX)
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ else
+ current->thread.fpu.regs = vcpu->run->s.regs.fprs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
+ if (test_fp_ctl(current->thread.fpu.fpc))
+ /* User space provided an invalid FPC, let's clear it */
+ current->thread.fpu.fpc = 0;
kvm_run->kvm_dirty_regs = 0;
}
@@ -2758,6 +2744,15 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+ save_access_regs(vcpu->run->s.regs.acrs);
+ restore_access_regs(vcpu->arch.host_acrs);
+ /* Save guest register state */
+ save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
+ /* Restore will be done lazily at return */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -2874,7 +2869,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
{
/*
* The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
- * copying in vcpu load/put. Lets update our copies before we save
+ * switch in the run ioctl. Let's update our copies before we save
* it into the save area
*/
save_fpu_regs();
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index be9fa65bfac4..7422a706f310 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -8,6 +8,45 @@
#include <asm/export.h>
/*
+ * void *memmove(void *dest, const void *src, size_t n)
+ */
+ENTRY(memmove)
+ ltgr %r4,%r4
+ lgr %r1,%r2
+ bzr %r14
+ clgr %r2,%r3
+ jnh .Lmemmove_forward
+ la %r5,0(%r4,%r3)
+ clgr %r2,%r5
+ jl .Lmemmove_reverse
+.Lmemmove_forward:
+ aghi %r4,-1
+ srlg %r0,%r4,8
+ ltgr %r0,%r0
+ jz .Lmemmove_rest
+.Lmemmove_loop:
+ mvc 0(256,%r1),0(%r3)
+ la %r1,256(%r1)
+ la %r3,256(%r3)
+ brctg %r0,.Lmemmove_loop
+.Lmemmove_rest:
+ larl %r5,.Lmemmove_mvc
+ ex %r4,0(%r5)
+ br %r14
+.Lmemmove_reverse:
+ aghi %r4,-1
+.Lmemmove_reverse_loop:
+ ic %r0,0(%r4,%r3)
+ stc %r0,0(%r4,%r1)
+ brctg %r4,.Lmemmove_reverse_loop
+ ic %r0,0(%r4,%r3)
+ stc %r0,0(%r4,%r1)
+ br %r14
+.Lmemmove_mvc:
+ mvc 0(1,%r1),0(%r3)
+EXPORT_SYMBOL(memmove)
+
+/*
* memset implementation
*
* This code corresponds to the C construct below. We do distinguish
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 661d9fe63c43..d1faae5cdd12 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -733,6 +733,7 @@ block:
* return to userspace schedule() to block. */
__set_current_state(TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
+ set_preempt_need_resched();
}
}
out:
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 1848292766ef..45becc8a44ec 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -34,7 +34,7 @@ static void __ref *vmem_alloc_pages(unsigned int order)
if (slab_is_available())
return (void *)__get_free_pages(GFP_KERNEL, order);
- return alloc_bootmem_align(size, size);
+ return (void *) memblock_alloc(size, size);
}
static inline pud_t *vmem_pud_alloc(void)
@@ -61,17 +61,16 @@ pmd_t *vmem_pmd_alloc(void)
pte_t __ref *vmem_pte_alloc(void)
{
+ unsigned long size = PTRS_PER_PTE * sizeof(pte_t);
pte_t *pte;
if (slab_is_available())
pte = (pte_t *) page_table_alloc(&init_mm);
else
- pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
- PTRS_PER_PTE * sizeof(pte_t));
+ pte = (pte_t *) memblock_alloc(size, size);
if (!pte)
return NULL;
- clear_table((unsigned long *) pte, _PAGE_INVALID,
- PTRS_PER_PTE * sizeof(pte_t));
+ clear_table((unsigned long *) pte, _PAGE_INVALID, size);
return pte;
}
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 37e0bb835516..cfd08384f0ab 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/cpumask.h>
#include <linux/memblock.h>
+#include <linux/bootmem.h>
#include <linux/node.h>
#include <linux/memory.h>
#include <linux/slab.h>
@@ -307,13 +308,11 @@ fail:
/*
* Allocate and initialize core to node mapping
*/
-static void create_core_to_node_map(void)
+static void __ref create_core_to_node_map(void)
{
int i;
- emu_cores = kzalloc(sizeof(*emu_cores), GFP_KERNEL);
- if (emu_cores == NULL)
- panic("Could not allocate cores to node memory");
+ emu_cores = memblock_virt_alloc(sizeof(*emu_cores), 8);
for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
emu_cores->to_node_id[i] = NODE_ID_FREE;
}
@@ -354,13 +353,13 @@ static struct toptree *toptree_from_topology(void)
phys = toptree_new(TOPTREE_ID_PHYS, 1);
- for_each_online_cpu(cpu) {
- top = &per_cpu(cpu_topology, cpu);
+ for_each_cpu(cpu, &cpus_with_topology) {
+ top = &cpu_topology[cpu];
node = toptree_get_child(phys, 0);
drawer = toptree_get_child(node, top->drawer_id);
book = toptree_get_child(drawer, top->book_id);
mc = toptree_get_child(book, top->socket_id);
- core = toptree_get_child(mc, top->core_id);
+ core = toptree_get_child(mc, smp_get_base_cpu(cpu));
if (!drawer || !book || !mc || !core)
panic("NUMA emulation could not allocate memory");
cpumask_set_cpu(cpu, &core->mask);
@@ -378,7 +377,7 @@ static void topology_add_core(struct toptree *core)
int cpu;
for_each_cpu(cpu, &core->mask) {
- top = &per_cpu(cpu_topology, cpu);
+ top = &cpu_topology[cpu];
cpumask_copy(&top->thread_mask, &core->mask);
cpumask_copy(&top->core_mask, &core_mc(core)->mask);
cpumask_copy(&top->book_mask, &core_book(core)->mask);
@@ -425,6 +424,27 @@ static void print_node_to_core_map(void)
}
}
+static void pin_all_possible_cpus(void)
+{
+ int core_id, node_id, cpu;
+ static int initialized;
+
+ if (initialized)
+ return;
+ print_node_to_core_map();
+ node_id = 0;
+ for_each_possible_cpu(cpu) {
+ core_id = smp_get_base_cpu(cpu);
+ if (emu_cores->to_node_id[core_id] != NODE_ID_FREE)
+ continue;
+ pin_core_to_node(core_id, node_id);
+ cpu_topology[cpu].node_id = node_id;
+ node_id = (node_id + 1) % emu_nodes;
+ }
+ print_node_to_core_map();
+ initialized = 1;
+}
+
/*
* Transfer physical topology into a NUMA topology and modify CPU masks
* according to the NUMA topology.
@@ -442,7 +462,7 @@ static void emu_update_cpu_topology(void)
toptree_free(phys);
toptree_to_topology(numa);
toptree_free(numa);
- print_node_to_core_map();
+ pin_all_possible_cpus();
}
/*
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
index 902d350d859a..26f622b1cd11 100644
--- a/arch/s390/numa/toptree.c
+++ b/arch/s390/numa/toptree.c
@@ -7,6 +7,7 @@
*/
#include <linux/kernel.h>
+#include <linux/bootmem.h>
#include <linux/cpumask.h>
#include <linux/list.h>
#include <linux/list_sort.h>
@@ -25,10 +26,14 @@
* RETURNS:
* Pointer to the new tree node or NULL on error
*/
-struct toptree *toptree_alloc(int level, int id)
+struct toptree __ref *toptree_alloc(int level, int id)
{
- struct toptree *res = kzalloc(sizeof(struct toptree), GFP_KERNEL);
+ struct toptree *res;
+ if (slab_is_available())
+ res = kzalloc(sizeof(*res), GFP_KERNEL);
+ else
+ res = memblock_virt_alloc(sizeof(*res), 8);
if (!res)
return res;
@@ -65,7 +70,7 @@ static void toptree_remove(struct toptree *cand)
* cleanly using toptree_remove. Possible children are freed
* recursively. In the end @cand itself is freed.
*/
-void toptree_free(struct toptree *cand)
+void __ref toptree_free(struct toptree *cand)
{
struct toptree *child, *tmp;
@@ -73,7 +78,10 @@ void toptree_free(struct toptree *cand)
toptree_remove(cand);
toptree_for_each_child_safe(child, tmp, cand)
toptree_free(child);
- kfree(cand);
+ if (slab_is_available())
+ kfree(cand);
+ else
+ memblock_free_early((unsigned long)cand, sizeof(*cand));
}
/**
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 15ffc19c8c0c..64e1734bebb7 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -722,6 +722,11 @@ struct dev_pm_ops pcibios_pm_ops = {
static int zpci_alloc_domain(struct zpci_dev *zdev)
{
+ if (zpci_unique_uid) {
+ zdev->domain = (u16) zdev->uid;
+ return 0;
+ }
+
spin_lock(&zpci_domain_lock);
zdev->domain = find_first_zero_bit(zpci_domain, ZPCI_NR_DEVICES);
if (zdev->domain == ZPCI_NR_DEVICES) {
@@ -735,6 +740,9 @@ static int zpci_alloc_domain(struct zpci_dev *zdev)
static void zpci_free_domain(struct zpci_dev *zdev)
{
+ if (zpci_unique_uid)
+ return;
+
spin_lock(&zpci_domain_lock);
clear_bit(zdev->domain, zpci_domain);
spin_unlock(&zpci_domain_lock);
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 1a4512c8544a..e3ef63b36b5a 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -22,6 +22,8 @@
#include <asm/clp.h>
#include <uapi/asm/clp.h>
+bool zpci_unique_uid;
+
static inline void zpci_err_clp(unsigned int rsp, int rc)
{
struct {
@@ -315,6 +317,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb,
goto out;
}
+ zpci_unique_uid = rrb->response.uid_checking;
WARN_ON_ONCE(rrb->response.entry_size !=
sizeof(struct clp_fh_list_entry));
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index 38993b156924..c2f786f0ea06 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -69,7 +69,7 @@ static void pci_sw_counter_show(struct seq_file *m)
int i;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
- seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
+ seq_printf(m, "%26s:\t%lu\n", pci_sw_names[i],
atomic64_read(counter));
}
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 6b2f72f523b9..1d7a9c71944a 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -181,14 +181,17 @@ static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
/*
* With zdev->tlb_refresh == 0, rpcit is not required to establish new
* translations when previously invalid translation-table entries are
- * validated. With lazy unmap, it also is skipped for previously valid
+ * validated. With lazy unmap, rpcit is skipped for previously valid
* entries, but a global rpcit is then required before any address can
* be re-used, i.e. after each iommu bitmap wrap-around.
*/
- if (!zdev->tlb_refresh &&
- (!s390_iommu_strict ||
- ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
- return 0;
+ if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
+ if (!zdev->tlb_refresh)
+ return 0;
+ } else {
+ if (!s390_iommu_strict)
+ return 0;
+ }
return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
PAGE_ALIGN(size));
@@ -257,7 +260,7 @@ static dma_addr_t dma_alloc_address(struct device *dev, int size)
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
if (offset == -1) {
- if (!zdev->tlb_refresh && !s390_iommu_strict) {
+ if (!s390_iommu_strict) {
/* global flush before DMA addresses are reused */
if (zpci_refresh_global(zdev))
goto out_error;
@@ -292,7 +295,7 @@ static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
if (!zdev->iommu_bitmap)
goto out;
- if (zdev->tlb_refresh || s390_iommu_strict)
+ if (s390_iommu_strict)
bitmap_clear(zdev->iommu_bitmap, offset, size);
else
bitmap_set(zdev->lazy_bitmap, offset, size);
@@ -388,8 +391,6 @@ static void *s390_dma_alloc(struct device *dev, size_t size,
return NULL;
pa = page_to_phys(page);
- memset((void *) pa, 0, size);
-
map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
if (dma_mapping_error(dev, map)) {
free_pages(pa, get_order(size));
@@ -419,6 +420,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
size_t size, dma_addr_t *handle,
enum dma_data_direction dir)
{
+ unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
dma_addr_t dma_addr_base, dma_addr;
int flags = ZPCI_PTE_VALID;
@@ -426,8 +428,7 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
unsigned long pa = 0;
int ret;
- size = PAGE_ALIGN(size);
- dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT);
+ dma_addr_base = dma_alloc_address(dev, nr_pages);
if (dma_addr_base == DMA_ERROR_CODE)
return -ENOMEM;
@@ -436,26 +437,27 @@ static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
flags |= ZPCI_TABLE_PROTECTED;
for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
- pa = page_to_phys(sg_page(s)) + s->offset;
- ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags);
+ pa = page_to_phys(sg_page(s));
+ ret = __dma_update_trans(zdev, pa, dma_addr,
+ s->offset + s->length, flags);
if (ret)
goto unmap;
- dma_addr += s->length;
+ dma_addr += s->offset + s->length;
}
ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
if (ret)
goto unmap;
*handle = dma_addr_base;
- atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages);
+ atomic64_add(nr_pages, &zdev->mapped_pages);
return ret;
unmap:
dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
ZPCI_PTE_INVALID);
- dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT);
+ dma_free_address(dev, dma_addr_base, nr_pages);
zpci_err("map error:\n");
zpci_err_dma(ret, pa);
return ret;
@@ -564,7 +566,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
rc = -ENOMEM;
goto free_dma_table;
}
- if (!zdev->tlb_refresh && !s390_iommu_strict) {
+ if (!s390_iommu_strict) {
zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
if (!zdev->lazy_bitmap) {
rc = -ENOMEM;
diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile
index 6d9814c9df2b..4b5e1e499527 100644
--- a/arch/s390/tools/Makefile
+++ b/arch/s390/tools/Makefile
@@ -9,7 +9,5 @@ define filechk_facilities.h
$(obj)/gen_facilities
endef
-$(obj)/gen_facilities.o: $(srctree)/arch/s390/tools/gen_facilities.c
-
include/generated/facilities.h: $(obj)/gen_facilities FORCE
$(call filechk,facilities.h)
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index fe4e6c910dd7..8cc53b1e6d03 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -7,13 +7,83 @@
*
*/
-#define S390_GEN_FACILITIES_C
-
#include <strings.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
-#include <asm/facilities_src.h>
+
+struct facility_def {
+ char *name;
+ int *bits;
+};
+
+static struct facility_def facility_defs[] = {
+ {
+ /*
+ * FACILITIES_ALS contains the list of facilities that are
+ * required to run a kernel that is compiled e.g. with
+ * -march=<machine>.
+ */
+ .name = "FACILITIES_ALS",
+ .bits = (int[]){
+#ifdef CONFIG_HAVE_MARCH_Z900_FEATURES
+ 0, /* N3 instructions */
+ 1, /* z/Arch mode installed */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z990_FEATURES
+ 18, /* long displacement facility */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
+ 7, /* stfle */
+ 17, /* message security assist */
+ 21, /* extended-immediate facility */
+ 25, /* store clock fast */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ 27, /* mvcos */
+ 32, /* compare and swap and store */
+ 33, /* compare and swap and store 2 */
+ 34, /* general extension facility */
+ 35, /* execute extensions */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ 45, /* fast-BCR, etc. */
+#endif
+#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
+ 49, /* misc-instruction-extensions */
+ 52, /* interlocked facility 2 */
+#endif
+#ifdef CONFIG_HAVE_MARCH_Z13_FEATURES
+ 53, /* load-and-zero-rightmost-byte, etc. */
+#endif
+ -1 /* END */
+ }
+ },
+ {
+ .name = "FACILITIES_KVM",
+ .bits = (int[]){
+ 0, /* N3 instructions */
+ 1, /* z/Arch mode installed */
+ 2, /* z/Arch mode active */
+ 3, /* DAT-enhancement */
+ 4, /* idte segment table */
+ 5, /* idte region table */
+ 6, /* ASN-and-LX reuse */
+ 7, /* stfle */
+ 8, /* enhanced-DAT 1 */
+ 9, /* sense-running-status */
+ 10, /* conditional sske */
+ 13, /* ipte-range */
+ 14, /* nonquiescing key-setting */
+ 73, /* transactional execution */
+ 75, /* access-exception-fetch/store indication */
+ 76, /* msa extension 3 */
+ 77, /* msa extension 4 */
+ 78, /* enhanced-DAT 2 */
+ -1 /* END */
+ }
+ },
+};
static void print_facility_list(struct facility_def *def)
{
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile
index accc7ca722e1..252e9fee687f 100644
--- a/arch/sh/kernel/cpu/Makefile
+++ b/arch/sh/kernel/cpu/Makefile
@@ -1,5 +1,5 @@
#
-# Makefile for the Linux/SuperH CPU-specifc backends.
+# Makefile for the Linux/SuperH CPU-specific backends.
#
obj-$(CONFIG_CPU_SH2) = sh2/
diff --git a/arch/sh/kernel/cpu/irq/Makefile b/arch/sh/kernel/cpu/irq/Makefile
index f0c7025a67d1..3f8e79402d7d 100644
--- a/arch/sh/kernel/cpu/irq/Makefile
+++ b/arch/sh/kernel/cpu/irq/Makefile
@@ -1,5 +1,5 @@
#
-# Makefile for the Linux/SuperH CPU-specifc IRQ handlers.
+# Makefile for the Linux/SuperH CPU-specific IRQ handlers.
#
obj-$(CONFIG_SUPERH32) += imask.o
obj-$(CONFIG_CPU_SH5) += intc-sh5.o
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 34d9e15857c3..44163e8c3868 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -25,7 +25,7 @@ KCOV_INSTRUMENT := n
targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4
-KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ -O2
KBUILD_CFLAGS += -fno-strict-aliasing $(call cc-option, -fPIE, -fPIC)
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
cflags-$(CONFIG_X86_32) := -march=i386
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 476b574de99e..ec23d8e1297c 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -1,13 +1,17 @@
#ifndef _ASM_X86_E820_H
#define _ASM_X86_E820_H
-#ifdef CONFIG_EFI
+/*
+ * E820_X_MAX is the maximum size of the extended E820 table. The extended
+ * table may contain up to 3 extra E820 entries per possible NUMA node, so we
+ * make room for 3 * MAX_NUMNODES possible entries, beyond the standard 128.
+ * Also note that E820_X_MAX *must* be defined before we include uapi/asm/e820.h.
+ */
#include <linux/numa.h>
#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
-#else /* ! CONFIG_EFI */
-#define E820_X_MAX E820MAX
-#endif
+
#include <uapi/asm/e820.h>
+
#ifndef __ASSEMBLY__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map *e820;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bdde80731f49..7892530cbacf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -191,6 +191,8 @@ enum {
#define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4
#define PFERR_PK_BIT 5
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
@@ -198,6 +200,13 @@ enum {
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
+#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
+
+#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
+ PFERR_USER_MASK | \
+ PFERR_WRITE_MASK | \
+ PFERR_PRESENT_MASK)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -1062,6 +1071,7 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
+bool pdptrs_changed(struct kvm_vcpu *vcpu);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
@@ -1124,7 +1134,8 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port);
+int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
@@ -1203,7 +1214,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu);
@@ -1358,7 +1369,8 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a002b07a7099..2b5b2d4b924e 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,7 @@
#define VMX_H
+#include <linux/bitops.h>
#include <linux/types.h>
#include <uapi/asm/vmx.h>
@@ -60,6 +61,7 @@
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
@@ -110,6 +112,36 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
+{
+ return vmx_basic & GENMASK_ULL(30, 0);
+}
+
+static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
+}
+
+static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
+{
+ return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+}
+
+static inline int vmx_misc_cr3_count(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(24, 16)) >> 16;
+}
+
+static inline int vmx_misc_max_msr(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(27, 25)) >> 25;
+}
+
+static inline int vmx_misc_mseg_revid(u64 vmx_misc)
+{
+ return (vmx_misc & GENMASK_ULL(63, 32)) >> 32;
+}
+
/* VMCS Encodings */
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
@@ -399,10 +431,11 @@ enum vmcs_field {
#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2)
#define VMX_NR_VPIDS (1 << 16)
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_VPID_EXTENT_SINGLE_CONTEXT 1
#define VMX_VPID_EXTENT_ALL_CONTEXT 2
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL 3
-#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR 0
#define VMX_EPT_EXTENT_CONTEXT 1
#define VMX_EPT_EXTENT_GLOBAL 2
#define VMX_EPT_EXTENT_SHIFT 24
@@ -419,8 +452,10 @@ enum vmcs_field {
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_VPID_INVVPID_BIT (1ull << 0) /* (32 - 32) */
+#define VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT (1ull << 8) /* (40 - 32) */
#define VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT (1ull << 9) /* (41 - 32) */
#define VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT (1ull << 10) /* (42 - 32) */
+#define VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT (1ull << 11) /* (43 - 32) */
#define VMX_EPT_DEFAULT_GAW 3
#define VMX_EPT_MAX_GAW 0x4
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 37fee272618f..14458658e988 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,8 @@
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_GDTR_IDTR 46
+#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
@@ -113,6 +115,8 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
@@ -129,6 +133,7 @@
{ EXIT_REASON_XRSTORS, "XRSTORS" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
+#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index de6626c18e42..be6337156502 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -934,6 +934,8 @@ static int __populate_cache_leaves(unsigned int cpu)
ci_leaf_init(this_leaf++, &id4_regs);
__cache_cpumap_setup(cpu, idx, &id4_regs);
}
+ this_cpu_ci->cpu_map_populated = true;
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 132e1ec67da0..00ef43233e03 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -516,7 +516,7 @@ int mce_available(struct cpuinfo_x86 *c)
static void mce_schedule_work(void)
{
- if (!mce_gen_pool_empty() && keventd_up())
+ if (!mce_gen_pool_empty())
schedule_work(&mce_work);
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0aefb626fa8f..b2d3cf1ef54a 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,6 +16,7 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <asm/processor.h>
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include "cpuid.h"
@@ -64,6 +65,11 @@ u64 kvm_supported_xcr0(void)
#define F(x) bit(X86_FEATURE_##x)
+/* These are scattered features in cpufeatures.h. */
+#define KVM_CPUID_BIT_AVX512_4VNNIW 2
+#define KVM_CPUID_BIT_AVX512_4FMAPS 3
+#define KF(x) bit(KVM_CPUID_BIT_##x)
+
int kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -80,6 +86,10 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
best->ecx |= F(OSXSAVE);
}
+ best->edx &= ~F(APIC);
+ if (vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE)
+ best->edx |= F(APIC);
+
if (apic) {
if (best->ecx & F(TSC_DEADLINE_TIMER))
apic->lapic_timer.timer_mode_mask = 3 << 17;
@@ -374,6 +384,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features = F(PKU) | 0 /*OSPKE*/;
+ /* cpuid 7.0.edx*/
+ const u32 kvm_cpuid_7_0_edx_x86_features =
+ KF(AVX512_4VNNIW) | KF(AVX512_4FMAPS);
+
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -456,12 +470,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled)
entry->ecx &= ~F(PKU);
+ entry->edx &= kvm_cpuid_7_0_edx_x86_features;
+ entry->edx &= get_scattered_cpuid_leaf(7, 0, CPUID_EDX);
} else {
entry->ebx = 0;
entry->ecx = 0;
+ entry->edx = 0;
}
entry->eax = 0;
- entry->edx = 0;
break;
}
case 9:
@@ -861,17 +877,17 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
}
EXPORT_SYMBOL_GPL(kvm_cpuid);
-void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
+int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
- u32 function, eax, ebx, ecx, edx;
+ u32 eax, ebx, ecx, edx;
- function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
+ eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a3ce9d260d68..56628a44668b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -158,9 +158,11 @@
#define Src2GS (OpGS << Src2Shift)
#define Src2Mask (OpMask << Src2Shift)
#define Mmx ((u64)1 << 40) /* MMX Vector instruction */
+#define AlignMask ((u64)7 << 41)
#define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */
-#define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */
-#define Avx ((u64)1 << 43) /* Advanced Vector Extensions */
+#define Unaligned ((u64)2 << 41) /* Explicitly unaligned (e.g. MOVDQU) */
+#define Avx ((u64)3 << 41) /* Advanced Vector Extensions */
+#define Aligned16 ((u64)4 << 41) /* Aligned to 16 byte boundary (e.g. FXSAVE) */
#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */
#define NoWrite ((u64)1 << 45) /* No writeback */
#define SrcWrite ((u64)1 << 46) /* Write back src operand */
@@ -446,6 +448,26 @@ FOP_END;
FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
FOP_END;
+/*
+ * XXX: inoutclob user must know where the argument is being expanded.
+ * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault.
+ */
+#define asm_safe(insn, inoutclob...) \
+({ \
+ int _fault = 0; \
+ \
+ asm volatile("1:" insn "\n" \
+ "2:\n" \
+ ".pushsection .fixup, \"ax\"\n" \
+ "3: movl $1, %[_fault]\n" \
+ " jmp 2b\n" \
+ ".popsection\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [_fault] "+qm"(_fault) inoutclob ); \
+ \
+ _fault ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; \
+})
+
static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt,
enum x86_intercept intercept,
enum x86_intercept_stage stage)
@@ -632,21 +654,26 @@ static void set_segment_selector(struct x86_emulate_ctxt *ctxt, u16 selector,
* depending on whether they're AVX encoded or not.
*
* Also included is CMPXCHG16B which is not a vector instruction, yet it is
- * subject to the same check.
+ * subject to the same check. FXSAVE and FXRSTOR are checked here too as their
+ * 512 bytes of data must be aligned to a 16 byte boundary.
*/
-static bool insn_aligned(struct x86_emulate_ctxt *ctxt, unsigned size)
+static unsigned insn_alignment(struct x86_emulate_ctxt *ctxt, unsigned size)
{
- if (likely(size < 16))
- return false;
+ u64 alignment = ctxt->d & AlignMask;
- if (ctxt->d & Aligned)
- return true;
- else if (ctxt->d & Unaligned)
- return false;
- else if (ctxt->d & Avx)
- return false;
- else
- return true;
+ if (likely(size < 16))
+ return 1;
+
+ switch (alignment) {
+ case Unaligned:
+ case Avx:
+ return 1;
+ case Aligned16:
+ return 16;
+ case Aligned:
+ default:
+ return size;
+ }
}
static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
@@ -704,7 +731,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
}
break;
}
- if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
+ if (la & (insn_alignment(ctxt, size) - 1))
return emulate_gp(ctxt, 0);
return X86EMUL_CONTINUE;
bad:
@@ -3842,6 +3869,131 @@ static int em_movsxd(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
+static int check_fxsr(struct x86_emulate_ctxt *ctxt)
+{
+ u32 eax = 1, ebx, ecx = 0, edx;
+
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ if (!(edx & FFL(FXSR)))
+ return emulate_ud(ctxt);
+
+ if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM))
+ return emulate_nm(ctxt);
+
+ /*
+ * Don't emulate a case that should never be hit, instead of working
+ * around a lack of fxsave64/fxrstor64 on old compilers.
+ */
+ if (ctxt->mode >= X86EMUL_MODE_PROT64)
+ return X86EMUL_UNHANDLEABLE;
+
+ return X86EMUL_CONTINUE;
+}
+
+/*
+ * FXSAVE and FXRSTOR have 4 different formats depending on execution mode,
+ * 1) 16 bit mode
+ * 2) 32 bit mode
+ * - like (1), but FIP and FDP (foo) are only 16 bit. At least Intel CPUs
+ * preserve whole 32 bit values, though, so (1) and (2) are the same wrt.
+ * save and restore
+ * 3) 64-bit mode with REX.W prefix
+ * - like (2), but XMM 8-15 are being saved and restored
+ * 4) 64-bit mode without REX.W prefix
+ * - like (3), but FIP and FDP are 64 bit
+ *
+ * Emulation uses (3) for (1) and (2) and preserves XMM 8-15 to reach the
+ * desired result. (4) is not emulated.
+ *
+ * Note: Guest and host CPUID.(EAX=07H,ECX=0H):EBX[bit 13] (deprecate FPU CS
+ * and FPU DS) should match.
+ */
+static int em_fxsave(struct x86_emulate_ctxt *ctxt)
+{
+ struct fxregs_state fx_state;
+ size_t size;
+ int rc;
+
+ rc = check_fxsr(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ ctxt->ops->get_fpu(ctxt);
+
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state));
+
+ ctxt->ops->put_fpu(ctxt);
+
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR)
+ size = offsetof(struct fxregs_state, xmm_space[8 * 16/4]);
+ else
+ size = offsetof(struct fxregs_state, xmm_space[0]);
+
+ return segmented_write(ctxt, ctxt->memop.addr.mem, &fx_state, size);
+}
+
+static int fxrstor_fixup(struct x86_emulate_ctxt *ctxt,
+ struct fxregs_state *new)
+{
+ int rc = X86EMUL_CONTINUE;
+ struct fxregs_state old;
+
+ rc = asm_safe("fxsave %[fx]", , [fx] "+m"(old));
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ /*
+ * 64 bit host will restore XMM 8-15, which is not correct on non-64
+ * bit guests. Load the current values in order to preserve 64 bit
+ * XMMs after fxrstor.
+ */
+#ifdef CONFIG_X86_64
+ /* XXX: accessing XMM 8-15 very awkwardly */
+ memcpy(&new->xmm_space[8 * 16/4], &old.xmm_space[8 * 16/4], 8 * 16);
+#endif
+
+ /*
+ * Hardware doesn't save and restore XMM 0-7 without CR4.OSFXSR, but
+ * does save and restore MXCSR.
+ */
+ if (!(ctxt->ops->get_cr(ctxt, 4) & X86_CR4_OSFXSR))
+ memcpy(new->xmm_space, old.xmm_space, 8 * 16);
+
+ return rc;
+}
+
+static int em_fxrstor(struct x86_emulate_ctxt *ctxt)
+{
+ struct fxregs_state fx_state;
+ int rc;
+
+ rc = check_fxsr(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ rc = segmented_read(ctxt, ctxt->memop.addr.mem, &fx_state, 512);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ if (fx_state.mxcsr >> 16)
+ return emulate_gp(ctxt, 0);
+
+ ctxt->ops->get_fpu(ctxt);
+
+ if (ctxt->mode < X86EMUL_MODE_PROT64)
+ rc = fxrstor_fixup(ctxt, &fx_state);
+
+ if (rc == X86EMUL_CONTINUE)
+ rc = asm_safe("fxrstor %[fx]", : [fx] "m"(fx_state));
+
+ ctxt->ops->put_fpu(ctxt);
+
+ return rc;
+}
+
static bool valid_cr(int nr)
{
switch (nr) {
@@ -4194,7 +4346,9 @@ static const struct gprefix pfx_0f_ae_7 = {
};
static const struct group_dual group15 = { {
- N, N, N, N, N, N, N, GP(0, &pfx_0f_ae_7),
+ I(ModRM | Aligned16, em_fxsave),
+ I(ModRM | Aligned16, em_fxrstor),
+ N, N, N, N, N, GP(0, &pfx_0f_ae_7),
}, {
N, N, N, N, N, N, N, N,
} };
@@ -5066,21 +5220,13 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
static int flush_pending_x87_faults(struct x86_emulate_ctxt *ctxt)
{
- bool fault = false;
+ int rc;
ctxt->ops->get_fpu(ctxt);
- asm volatile("1: fwait \n\t"
- "2: \n\t"
- ".pushsection .fixup,\"ax\" \n\t"
- "3: \n\t"
- "movb $1, %[fault] \n\t"
- "jmp 2b \n\t"
- ".popsection \n\t"
- _ASM_EXTABLE(1b, 3b)
- : [fault]"+qm"(fault));
+ rc = asm_safe("fwait");
ctxt->ops->put_fpu(ctxt);
- if (unlikely(fault))
+ if (unlikely(rc != X86EMUL_CONTINUE))
return emulate_exception(ctxt, MF_VECTOR, 0, false);
return X86EMUL_CONTINUE;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 42b1c83741c8..99cde5220e07 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -291,7 +291,7 @@ static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
return ret;
}
-int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
+static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
{
struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
struct kvm_lapic_irq irq;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 16a7134eedac..a78b445ce411 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -212,7 +212,7 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
*/
smp_mb();
if (atomic_dec_if_positive(&ps->pending) > 0)
- kthread_queue_work(&pit->worker, &pit->expired);
+ kthread_queue_work(pit->worker, &pit->expired);
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -272,7 +272,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
if (atomic_read(&ps->reinject))
atomic_inc(&ps->pending);
- kthread_queue_work(&pt->worker, &pt->expired);
+ kthread_queue_work(pt->worker, &pt->expired);
if (ps->is_periodic) {
hrtimer_add_expires_ns(&ps->timer, ps->period);
@@ -667,10 +667,8 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
pid_nr = pid_vnr(pid);
put_pid(pid);
- kthread_init_worker(&pit->worker);
- pit->worker_task = kthread_run(kthread_worker_fn, &pit->worker,
- "kvm-pit/%d", pid_nr);
- if (IS_ERR(pit->worker_task))
+ pit->worker = kthread_create_worker(0, "kvm-pit/%d", pid_nr);
+ if (IS_ERR(pit->worker))
goto fail_kthread;
kthread_init_work(&pit->expired, pit_do_work);
@@ -713,7 +711,7 @@ fail_register_speaker:
fail_register_pit:
mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false);
- kthread_stop(pit->worker_task);
+ kthread_destroy_worker(pit->worker);
fail_kthread:
kvm_free_irq_source_id(kvm, pit->irq_source_id);
fail_request:
@@ -730,8 +728,7 @@ void kvm_free_pit(struct kvm *kvm)
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
kvm_pit_set_reinject(pit, false);
hrtimer_cancel(&pit->pit_state.timer);
- kthread_flush_work(&pit->expired);
- kthread_stop(pit->worker_task);
+ kthread_destroy_worker(pit->worker);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 2f5af0798326..600bee9dcbbd 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -44,8 +44,7 @@ struct kvm_pit {
struct kvm_kpit_state pit_state;
int irq_source_id;
struct kvm_irq_mask_notifier mask_notifier;
- struct kthread_worker worker;
- struct task_struct *worker_task;
+ struct kthread_worker *worker;
struct kthread_work expired;
};
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6f69340f9fa3..34a66b2d47e6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -342,9 +342,11 @@ void __kvm_apic_update_irr(u32 *pir, void *regs)
u32 i, pir_val;
for (i = 0; i <= 7; i++) {
- pir_val = xchg(&pir[i], 0);
- if (pir_val)
+ pir_val = READ_ONCE(pir[i]);
+ if (pir_val) {
+ pir_val = xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
+ }
}
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
@@ -1090,7 +1092,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- ktime_t remaining;
+ ktime_t remaining, now;
s64 ns;
u32 tmcct;
@@ -1101,7 +1103,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
apic->lapic_timer.period == 0)
return 0;
- remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+ now = ktime_get();
+ remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
@@ -1332,7 +1335,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_save(flags);
- now = apic->lapic_timer.timer.base->get_time();
+ now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
@@ -1347,6 +1350,79 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (apic_lvtt_oneshot(apic) &&
+ ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+ return;
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
+}
+
+static bool set_target_expiration(struct kvm_lapic *apic)
+{
+ ktime_t now;
+ u64 tscl = rdtsc();
+
+ now = ktime_get();
+ apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+ * APIC_BUS_CYCLE_NS * apic->divide_count;
+
+ if (!apic->lapic_timer.period)
+ return false;
+
+ /*
+ * Do not allow the guest to program periodic timers with small
+ * interval, since the hrtimers are not throttled by the host
+ * scheduler.
+ */
+ if (apic_lvtt_period(apic)) {
+ s64 min_period = min_timer_period_us * 1000LL;
+
+ if (apic->lapic_timer.period < min_period) {
+ pr_info_ratelimited(
+ "kvm: vcpu %i: requested %lld ns "
+ "lapic timer period limited to %lld ns\n",
+ apic->vcpu->vcpu_id,
+ apic->lapic_timer.period, min_period);
+ apic->lapic_timer.period = min_period;
+ }
+ }
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, period %lldns, "
+ "expire @ 0x%016" PRIx64 ".\n", __func__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ kvm_lapic_get_reg(apic, APIC_TMICT),
+ apic->lapic_timer.period,
+ ktime_to_ns(ktime_add_ns(now,
+ apic->lapic_timer.period)));
+
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+
+ return true;
+}
+
+static void advance_periodic_target_expiration(struct kvm_lapic *apic)
+{
+ apic->lapic_timer.tscdeadline +=
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration =
+ ktime_add_ns(apic->lapic_timer.target_expiration,
+ apic->lapic_timer.period);
+}
+
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -1356,52 +1432,59 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
-static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+static void cancel_hv_timer(struct kvm_lapic *apic)
{
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
}
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- WARN_ON(swait_active(&vcpu->wq));
- cancel_hv_tscdeadline(apic);
- apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
-static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+static bool start_hv_timer(struct kvm_lapic *apic)
{
u64 tscdeadline = apic->lapic_timer.tscdeadline;
- if (atomic_read(&apic->lapic_timer.pending) ||
+ if ((atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic)) ||
kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
} else {
apic->lapic_timer.hv_timer_in_use = true;
hrtimer_cancel(&apic->lapic_timer.timer);
/* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending))
- cancel_hv_tscdeadline(apic);
+ if (atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic))
+ cancel_hv_timer(apic);
}
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
apic->lapic_timer.hv_timer_in_use);
return apic->lapic_timer.hv_timer_in_use;
}
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ WARN_ON(swait_active(&vcpu->wq));
+ cancel_hv_timer(apic);
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
+ advance_periodic_target_expiration(apic);
+ if (!start_hv_timer(apic))
+ start_sw_period(apic);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
WARN_ON(apic->lapic_timer.hv_timer_in_use);
- if (apic_lvtt_tscdeadline(apic))
- start_hv_tscdeadline(apic);
+ start_hv_timer(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
@@ -1413,62 +1496,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
if (!apic->lapic_timer.hv_timer_in_use)
return;
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
if (atomic_read(&apic->lapic_timer.pending))
return;
- start_sw_tscdeadline(apic);
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ start_sw_period(apic);
+ else if (apic_lvtt_tscdeadline(apic))
+ start_sw_tscdeadline(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
static void start_apic_timer(struct kvm_lapic *apic)
{
- ktime_t now;
-
atomic_set(&apic->lapic_timer.pending, 0);
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- /* lapic timer in oneshot or periodic mode */
- now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
- * APIC_BUS_CYCLE_NS * apic->divide_count;
-
- if (!apic->lapic_timer.period)
- return;
- /*
- * Do not allow the guest to program periodic timers with small
- * interval, since the hrtimers are not throttled by the host
- * scheduler.
- */
- if (apic_lvtt_period(apic)) {
- s64 min_period = min_timer_period_us * 1000LL;
-
- if (apic->lapic_timer.period < min_period) {
- pr_info_ratelimited(
- "kvm: vcpu %i: requested %lld ns "
- "lapic timer period limited to %lld ns\n",
- apic->vcpu->vcpu_id,
- apic->lapic_timer.period, min_period);
- apic->lapic_timer.period = min_period;
- }
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- ktime_add_ns(now, apic->lapic_timer.period),
- HRTIMER_MODE_ABS_PINNED);
-
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
+ if (set_target_expiration(apic) &&
+ !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
+ start_sw_period(apic);
} else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
+ if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
start_sw_tscdeadline(apic);
}
}
@@ -1701,13 +1750,22 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
* LAPIC interface
*----------------------------------------------------------------------
*/
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (!lapic_in_kernel(vcpu))
+ return 0;
+
+ return apic->lapic_timer.tscdeadline;
+}
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
+ if (!lapic_in_kernel(vcpu) ||
+ !apic_lvtt_tscdeadline(apic))
return 0;
return apic->lapic_timer.tscdeadline;
@@ -1748,14 +1806,17 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
u64 old_value = vcpu->arch.apic_base;
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!apic) {
+ if (!apic)
value |= MSR_IA32_APICBASE_BSP;
- vcpu->arch.apic_base = value;
- return;
- }
vcpu->arch.apic_base = value;
+ if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
+ kvm_update_cpuid(vcpu);
+
+ if (!apic)
+ return;
+
/* update jump label if enable bit changes */
if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
if (value & MSR_IA32_APICBASE_ENABLE) {
@@ -1909,6 +1970,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
apic_timer_expired(apic);
if (lapic_is_periodic(apic)) {
+ advance_periodic_target_expiration(apic);
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
return HRTIMER_RESTART;
} else
@@ -1993,6 +2055,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVTT);
if (apic_lvtt_tscdeadline(apic))
apic->lapic_timer.tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ apic->lapic_timer.tscdeadline = 0;
+ apic->lapic_timer.target_expiration = ktime_set(0, 0);
+ }
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f60d01c29d51..e0c80233b3e1 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -15,6 +15,7 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
+ ktime_t target_expiration;
u32 timer_mode;
u32 timer_mode_mask;
u64 tscdeadline;
@@ -85,6 +86,7 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 87c5880ba3b7..7012de4a1fed 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1660,17 +1660,9 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
* This has some overhead, but not as much as the cost of swapping
* out actively used pages or breaking up actively used hugepages.
*/
- if (!shadow_accessed_mask) {
- /*
- * We are holding the kvm->mmu_lock, and we are blowing up
- * shadow PTEs. MMU notifier consumers need to be kept at bay.
- * This is correct as long as we don't decouple the mmu_lock
- * protected regions (like invalidate_range_start|end does).
- */
- kvm->mmu_notifier_seq++;
+ if (!shadow_accessed_mask)
return kvm_handle_hva_range(kvm, start, end, 0,
kvm_unmap_rmapp);
- }
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
@@ -4509,7 +4501,7 @@ static void make_mmu_pages_available(struct kvm_vcpu *vcpu)
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
}
-int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = EMULTYPE_RETRY;
@@ -4528,12 +4520,28 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code,
return r;
}
- r = vcpu->arch.mmu.page_fault(vcpu, cr2, error_code, false);
+ r = vcpu->arch.mmu.page_fault(vcpu, cr2, lower_32_bits(error_code),
+ false);
if (r < 0)
return r;
if (!r)
return 1;
+ /*
+ * Before emulating the instruction, check if the error code
+ * was due to a RO violation while translating the guest page.
+ * This can occur when using nested virtualization with nested
+ * paging in both guests. If true, we simply unprotect the page
+ * and resume the guest.
+ *
+ * Note: AMD only (since it supports the PFERR_GUEST_PAGE_MASK used
+ * in PFERR_NEXT_GUEST_PAGE)
+ */
+ if (error_code == PFERR_NESTED_GUEST_PAGE) {
+ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2));
+ return 1;
+ }
+
if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
emulate:
@@ -4967,7 +4975,7 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
* zap all shadow pages.
*/
if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
- printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
+ kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
kvm_mmu_invalidate_zap_all_pages(kvm);
}
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8ca1eca5038d..08a4d3ab3455 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2074,7 +2074,7 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm)
{
u64 fault_address = svm->vmcb->control.exit_info_2;
- u32 error_code;
+ u64 error_code;
int r = 1;
switch (svm->apf_reason) {
@@ -2270,7 +2270,7 @@ static int io_interception(struct vcpu_svm *svm)
++svm->vcpu.stat.io_exits;
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
- if (string || in)
+ if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
@@ -2278,7 +2278,8 @@ static int io_interception(struct vcpu_svm *svm)
svm->next_rip = svm->vmcb->control.exit_info_2;
skip_emulated_instruction(&svm->vcpu);
- return kvm_fast_pio_out(vcpu, size, port);
+ return in ? kvm_fast_pio_in(vcpu, size, port)
+ : kvm_fast_pio_out(vcpu, size, port);
}
static int nmi_interception(struct vcpu_svm *svm)
@@ -3150,8 +3151,7 @@ static int skinit_interception(struct vcpu_svm *svm)
static int wbinvd_interception(struct vcpu_svm *svm)
{
- kvm_emulate_wbinvd(&svm->vcpu);
- return 1;
+ return kvm_emulate_wbinvd(&svm->vcpu);
}
static int xsetbv_interception(struct vcpu_svm *svm)
@@ -3238,8 +3238,7 @@ static int task_switch_interception(struct vcpu_svm *svm)
static int cpuid_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- kvm_emulate_cpuid(&svm->vcpu);
- return 1;
+ return kvm_emulate_cpuid(&svm->vcpu);
}
static int iret_interception(struct vcpu_svm *svm)
@@ -3275,9 +3274,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
return emulate_on_interception(svm);
err = kvm_rdpmc(&svm->vcpu);
- kvm_complete_insn_gp(&svm->vcpu, err);
-
- return 1;
+ return kvm_complete_insn_gp(&svm->vcpu, err);
}
static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
@@ -3374,9 +3371,7 @@ static int cr_interception(struct vcpu_svm *svm)
}
kvm_register_write(&svm->vcpu, reg, val);
}
- kvm_complete_insn_gp(&svm->vcpu, err);
-
- return 1;
+ return kvm_complete_insn_gp(&svm->vcpu, err);
}
static int dr_interception(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3980da515fd0..aae43c6f2472 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -132,6 +132,22 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
+#define VMX_VPID_EXTENT_SUPPORTED_MASK \
+ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
+ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
+
+/*
+ * Hyper-V requires all of these, so mark them as supported even though
+ * they are just treated the same as all-context.
+ */
+#define VMX_VPID_EXTENT_SUPPORTED_MASK \
+ (VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
+ VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
+ VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
+
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -446,23 +462,31 @@ struct nested_vmx {
u16 vpid02;
u16 last_vpid;
+ /*
+ * We only store the "true" versions of the VMX capability MSRs. We
+ * generate the "non-true" versions by setting the must-be-1 bits
+ * according to the SDM.
+ */
u32 nested_vmx_procbased_ctls_low;
u32 nested_vmx_procbased_ctls_high;
- u32 nested_vmx_true_procbased_ctls_low;
u32 nested_vmx_secondary_ctls_low;
u32 nested_vmx_secondary_ctls_high;
u32 nested_vmx_pinbased_ctls_low;
u32 nested_vmx_pinbased_ctls_high;
u32 nested_vmx_exit_ctls_low;
u32 nested_vmx_exit_ctls_high;
- u32 nested_vmx_true_exit_ctls_low;
u32 nested_vmx_entry_ctls_low;
u32 nested_vmx_entry_ctls_high;
- u32 nested_vmx_true_entry_ctls_low;
u32 nested_vmx_misc_low;
u32 nested_vmx_misc_high;
u32 nested_vmx_ept_caps;
u32 nested_vmx_vpid_caps;
+ u64 nested_vmx_basic;
+ u64 nested_vmx_cr0_fixed0;
+ u64 nested_vmx_cr0_fixed1;
+ u64 nested_vmx_cr4_fixed0;
+ u64 nested_vmx_cr4_fixed1;
+ u64 nested_vmx_vmcs_enum;
};
#define POSTED_INTR_ON 0
@@ -520,6 +544,12 @@ static inline void pi_set_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
@@ -920,16 +950,32 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-static unsigned long *vmx_io_bitmap_a;
-static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
-static unsigned long *vmx_vmread_bitmap;
-static unsigned long *vmx_vmwrite_bitmap;
+enum {
+ VMX_IO_BITMAP_A,
+ VMX_IO_BITMAP_B,
+ VMX_MSR_BITMAP_LEGACY,
+ VMX_MSR_BITMAP_LONGMODE,
+ VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
+ VMX_MSR_BITMAP_LEGACY_X2APIC,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC,
+ VMX_VMREAD_BITMAP,
+ VMX_VMWRITE_BITMAP,
+ VMX_BITMAP_NR
+};
+
+static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
+
+#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
+#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
+#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
+#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
+#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
+#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
+#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
+#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
+#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
+#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -2523,14 +2569,14 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
} else {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
}
} else {
if (is_long_mode(vcpu))
@@ -2706,9 +2752,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
/* We support free control of debug control saving. */
- vmx->nested.nested_vmx_true_exit_ctls_low =
- vmx->nested.nested_vmx_exit_ctls_low &
- ~VM_EXIT_SAVE_DEBUG_CONTROLS;
+ vmx->nested.nested_vmx_exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
/* entry controls */
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2727,9 +2771,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
/* We support free control of debug control loading. */
- vmx->nested.nested_vmx_true_entry_ctls_low =
- vmx->nested.nested_vmx_entry_ctls_low &
- ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
+ vmx->nested.nested_vmx_entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
/* cpu-based controls */
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -2762,8 +2804,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
CPU_BASED_USE_MSR_BITMAPS;
/* We support free control of CR3 access interception. */
- vmx->nested.nested_vmx_true_procbased_ctls_low =
- vmx->nested.nested_vmx_procbased_ctls_low &
+ vmx->nested.nested_vmx_procbased_ctls_low &=
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
/* secondary cpu-based controls */
@@ -2774,6 +2815,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -2805,8 +2847,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
*/
if (enable_vpid)
vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
- VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT |
- VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
+ VMX_VPID_EXTENT_SUPPORTED_MASK;
else
vmx->nested.nested_vmx_vpid_caps = 0;
@@ -2823,14 +2864,52 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
VMX_MISC_ACTIVITY_HLT;
vmx->nested.nested_vmx_misc_high = 0;
+
+ /*
+ * This MSR reports some information about VMX support. We
+ * should return information about the VMX we emulate for the
+ * guest, and the VMCS structure we give it - not about the
+ * VMX support of the underlying hardware.
+ */
+ vmx->nested.nested_vmx_basic =
+ VMCS12_REVISION |
+ VMX_BASIC_TRUE_CTLS |
+ ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
+ (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
+
+ if (cpu_has_vmx_basic_inout())
+ vmx->nested.nested_vmx_basic |= VMX_BASIC_INOUT;
+
+ /*
+ * These MSRs specify bits which the guest must keep fixed on
+ * while L1 is in VMXON mode (in L1's root mode, or running an L2).
+ * We picked the standard core2 setting.
+ */
+#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
+#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
+ vmx->nested.nested_vmx_cr0_fixed0 = VMXON_CR0_ALWAYSON;
+ vmx->nested.nested_vmx_cr4_fixed0 = VMXON_CR4_ALWAYSON;
+
+ /* These MSRs specify bits which the guest must keep fixed off. */
+ rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx->nested.nested_vmx_cr0_fixed1);
+ rdmsrl(MSR_IA32_VMX_CR4_FIXED1, vmx->nested.nested_vmx_cr4_fixed1);
+
+ /* highest index: VMX_PREEMPTION_TIMER_VALUE */
+ vmx->nested.nested_vmx_vmcs_enum = 0x2e;
+}
+
+/*
+ * if fixed0[i] == 1: val[i] must be 1
+ * if fixed1[i] == 0: val[i] must be 0
+ */
+static inline bool fixed_bits_valid(u64 val, u64 fixed0, u64 fixed1)
+{
+ return ((val & fixed1) | fixed0) == val;
}
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
{
- /*
- * Bits 0 in high must be 0, and bits 1 in low must be 1.
- */
- return ((control & high) | low) == control;
+ return fixed_bits_valid(control, low, high);
}
static inline u64 vmx_control_msr(u32 low, u32 high)
@@ -2838,87 +2917,285 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
return low | ((u64)high << 32);
}
-/* Returns 0 on success, non-0 otherwise. */
-static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
+{
+ superset &= mask;
+ subset &= mask;
+
+ return (superset | subset) == superset;
+}
+
+static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+{
+ const u64 feature_and_reserved =
+ /* feature (except bit 48; see below) */
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+ /* reserved */
+ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+ u64 vmx_basic = vmx->nested.nested_vmx_basic;
+
+ if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+ return -EINVAL;
+
+ /*
+ * KVM does not emulate a version of VMX that constrains physical
+ * addresses of VMX structures (e.g. VMCS) to 32-bits.
+ */
+ if (data & BIT_ULL(48))
+ return -EINVAL;
+
+ if (vmx_basic_vmcs_revision_id(vmx_basic) !=
+ vmx_basic_vmcs_revision_id(data))
+ return -EINVAL;
+
+ if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
+ return -EINVAL;
+
+ vmx->nested.nested_vmx_basic = data;
+ return 0;
+}
+
+static int
+vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+{
+ u64 supported;
+ u32 *lowp, *highp;
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+ lowp = &vmx->nested.nested_vmx_pinbased_ctls_low;
+ highp = &vmx->nested.nested_vmx_pinbased_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+ lowp = &vmx->nested.nested_vmx_procbased_ctls_low;
+ highp = &vmx->nested.nested_vmx_procbased_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+ lowp = &vmx->nested.nested_vmx_exit_ctls_low;
+ highp = &vmx->nested.nested_vmx_exit_ctls_high;
+ break;
+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+ lowp = &vmx->nested.nested_vmx_entry_ctls_low;
+ highp = &vmx->nested.nested_vmx_entry_ctls_high;
+ break;
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ lowp = &vmx->nested.nested_vmx_secondary_ctls_low;
+ highp = &vmx->nested.nested_vmx_secondary_ctls_high;
+ break;
+ default:
+ BUG();
+ }
+
+ supported = vmx_control_msr(*lowp, *highp);
+
+ /* Check must-be-1 bits are still 1. */
+ if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
+ return -EINVAL;
+
+ /* Check must-be-0 bits are still 0. */
+ if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+ return -EINVAL;
+
+ *lowp = data;
+ *highp = data >> 32;
+ return 0;
+}
+
+static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+{
+ const u64 feature_and_reserved_bits =
+ /* feature */
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
+ BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+ /* reserved */
+ GENMASK_ULL(13, 9) | BIT_ULL(31);
+ u64 vmx_misc;
+
+ vmx_misc = vmx_control_msr(vmx->nested.nested_vmx_misc_low,
+ vmx->nested.nested_vmx_misc_high);
+
+ if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+ return -EINVAL;
+
+ if ((vmx->nested.nested_vmx_pinbased_ctls_high &
+ PIN_BASED_VMX_PREEMPTION_TIMER) &&
+ vmx_misc_preemption_timer_rate(data) !=
+ vmx_misc_preemption_timer_rate(vmx_misc))
+ return -EINVAL;
+
+ if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
+ return -EINVAL;
+
+ if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
+ return -EINVAL;
+
+ if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
+ return -EINVAL;
+
+ vmx->nested.nested_vmx_misc_low = data;
+ vmx->nested.nested_vmx_misc_high = data >> 32;
+ return 0;
+}
+
+static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+{
+ u64 vmx_ept_vpid_cap;
+
+ vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.nested_vmx_ept_caps,
+ vmx->nested.nested_vmx_vpid_caps);
+
+ /* Every bit is either reserved or a feature bit. */
+ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+ return -EINVAL;
+
+ vmx->nested.nested_vmx_ept_caps = data;
+ vmx->nested.nested_vmx_vpid_caps = data >> 32;
+ return 0;
+}
+
+static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+{
+ u64 *msr;
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_CR0_FIXED0:
+ msr = &vmx->nested.nested_vmx_cr0_fixed0;
+ break;
+ case MSR_IA32_VMX_CR4_FIXED0:
+ msr = &vmx->nested.nested_vmx_cr4_fixed0;
+ break;
+ default:
+ BUG();
+ }
+
+ /*
+ * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+ * must be 1 in the restored value.
+ */
+ if (!is_bitwise_subset(data, *msr, -1ULL))
+ return -EINVAL;
+
+ *msr = data;
+ return 0;
+}
+
+/*
+ * Called when userspace is restoring VMX MSRs.
+ *
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
switch (msr_index) {
case MSR_IA32_VMX_BASIC:
+ return vmx_restore_vmx_basic(vmx, data);
+ case MSR_IA32_VMX_PINBASED_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS:
+ case MSR_IA32_VMX_EXIT_CTLS:
+ case MSR_IA32_VMX_ENTRY_CTLS:
+ /*
+ * The "non-true" VMX capability MSRs are generated from the
+ * "true" MSRs, so we do not support restoring them directly.
+ *
+ * If userspace wants to emulate VMX_BASIC[55]=0, userspace
+ * should restore the "true" MSRs with the must-be-1 bits
+ * set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
+ * DEFAULT SETTINGS".
+ */
+ return -EINVAL;
+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+ case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+ case MSR_IA32_VMX_PROCBASED_CTLS2:
+ return vmx_restore_control_msr(vmx, msr_index, data);
+ case MSR_IA32_VMX_MISC:
+ return vmx_restore_vmx_misc(vmx, data);
+ case MSR_IA32_VMX_CR0_FIXED0:
+ case MSR_IA32_VMX_CR4_FIXED0:
+ return vmx_restore_fixed0_msr(vmx, msr_index, data);
+ case MSR_IA32_VMX_CR0_FIXED1:
+ case MSR_IA32_VMX_CR4_FIXED1:
+ /*
+ * These MSRs are generated based on the vCPU's CPUID, so we
+ * do not support restoring them directly.
+ */
+ return -EINVAL;
+ case MSR_IA32_VMX_EPT_VPID_CAP:
+ return vmx_restore_vmx_ept_vpid_cap(vmx, data);
+ case MSR_IA32_VMX_VMCS_ENUM:
+ vmx->nested.nested_vmx_vmcs_enum = data;
+ return 0;
+ default:
/*
- * This MSR reports some information about VMX support. We
- * should return information about the VMX we emulate for the
- * guest, and the VMCS structure we give it - not about the
- * VMX support of the underlying hardware.
+ * The rest of the VMX capability MSRs do not support restore.
*/
- *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS |
- ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
- (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
- if (cpu_has_vmx_basic_inout())
- *pdata |= VMX_BASIC_INOUT;
+ return -EINVAL;
+ }
+}
+
+/* Returns 0 on success, non-0 otherwise. */
+static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+ switch (msr_index) {
+ case MSR_IA32_VMX_BASIC:
+ *pdata = vmx->nested.nested_vmx_basic;
break;
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
case MSR_IA32_VMX_PINBASED_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_pinbased_ctls_low,
vmx->nested.nested_vmx_pinbased_ctls_high);
+ if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
+ *pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
- *pdata = vmx_control_msr(
- vmx->nested.nested_vmx_true_procbased_ctls_low,
- vmx->nested.nested_vmx_procbased_ctls_high);
- break;
case MSR_IA32_VMX_PROCBASED_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_procbased_ctls_low,
vmx->nested.nested_vmx_procbased_ctls_high);
+ if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
+ *pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
- *pdata = vmx_control_msr(
- vmx->nested.nested_vmx_true_exit_ctls_low,
- vmx->nested.nested_vmx_exit_ctls_high);
- break;
case MSR_IA32_VMX_EXIT_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_exit_ctls_low,
vmx->nested.nested_vmx_exit_ctls_high);
+ if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
+ *pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
- *pdata = vmx_control_msr(
- vmx->nested.nested_vmx_true_entry_ctls_low,
- vmx->nested.nested_vmx_entry_ctls_high);
- break;
case MSR_IA32_VMX_ENTRY_CTLS:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_entry_ctls_low,
vmx->nested.nested_vmx_entry_ctls_high);
+ if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
+ *pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
break;
case MSR_IA32_VMX_MISC:
*pdata = vmx_control_msr(
vmx->nested.nested_vmx_misc_low,
vmx->nested.nested_vmx_misc_high);
break;
- /*
- * These MSRs specify bits which the guest must keep fixed (on or off)
- * while L1 is in VMXON mode (in L1's root mode, or running an L2).
- * We picked the standard core2 setting.
- */
-#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
-#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
case MSR_IA32_VMX_CR0_FIXED0:
- *pdata = VMXON_CR0_ALWAYSON;
+ *pdata = vmx->nested.nested_vmx_cr0_fixed0;
break;
case MSR_IA32_VMX_CR0_FIXED1:
- *pdata = -1ULL;
+ *pdata = vmx->nested.nested_vmx_cr0_fixed1;
break;
case MSR_IA32_VMX_CR4_FIXED0:
- *pdata = VMXON_CR4_ALWAYSON;
+ *pdata = vmx->nested.nested_vmx_cr4_fixed0;
break;
case MSR_IA32_VMX_CR4_FIXED1:
- *pdata = -1ULL;
+ *pdata = vmx->nested.nested_vmx_cr4_fixed1;
break;
case MSR_IA32_VMX_VMCS_ENUM:
- *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */
+ *pdata = vmx->nested.nested_vmx_vmcs_enum;
break;
case MSR_IA32_VMX_PROCBASED_CTLS2:
*pdata = vmx_control_msr(
@@ -3101,7 +3378,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmx_leave_nested(vcpu);
break;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
- return 1; /* they are read-only */
+ if (!msr_info->host_initiated)
+ return 1; /* they are read-only */
+ if (!nested_vmx_allowed(vcpu))
+ return 1;
+ return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
return 1;
@@ -3863,6 +4144,40 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu)
(unsigned long *)&vcpu->arch.regs_dirty);
}
+static bool nested_guest_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+
+ if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
+ SECONDARY_EXEC_UNRESTRICTED_GUEST &&
+ nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
+ fixed0 &= ~(X86_CR0_PE | X86_CR0_PG);
+
+ return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+static bool nested_host_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr0_fixed1;
+
+ return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+static bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ u64 fixed0 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed0;
+ u64 fixed1 = to_vmx(vcpu)->nested.nested_vmx_cr4_fixed1;
+
+ return fixed_bits_valid(val, fixed0, fixed1);
+}
+
+/* No difference in the restrictions on guest and host CR4 in VMX operation. */
+#define nested_guest_cr4_valid nested_cr4_valid
+#define nested_host_cr4_valid nested_cr4_valid
+
static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
@@ -3991,8 +4306,8 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!nested_vmx_allowed(vcpu))
return 1;
}
- if (to_vmx(vcpu)->nested.vmxon &&
- ((cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON))
+
+ if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;
vcpu->arch.cr4 = cr4;
@@ -4569,41 +4884,6 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
- return;
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __set_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __set_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __set_bit(msr, msr_bitmap + 0x400 / f);
-
- if (type & MSR_TYPE_W)
- /* write-high */
- __set_bit(msr, msr_bitmap + 0xc00 / f);
-
- }
-}
-
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4659,48 +4939,18 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
msr, MSR_TYPE_R | MSR_TYPE_W);
}
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
{
if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
+ msr, type);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
+ msr, type);
} else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
+ msr, type);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
+ msr, type);
}
}
@@ -4822,9 +5072,15 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!pi_test_and_clear_on(&vmx->pi_desc))
+ if (!pi_test_on(&vmx->pi_desc))
return;
+ pi_clear_on(&vmx->pi_desc);
+ /*
+ * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+ * But on x86 this is just a compiler barrier anyway.
+ */
+ smp_mb__after_atomic();
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
}
@@ -5583,7 +5839,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu)
static int handle_io(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
- int size, in, string;
+ int size, in, string, ret;
unsigned port;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
@@ -5597,9 +5853,14 @@ static int handle_io(struct kvm_vcpu *vcpu)
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
- skip_emulated_instruction(vcpu);
- return kvm_fast_pio_out(vcpu, size, port);
+ ret = kvm_skip_emulated_instruction(vcpu);
+
+ /*
+ * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ return kvm_fast_pio_out(vcpu, size, port) && ret;
}
static void
@@ -5613,18 +5874,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xc1;
}
-static bool nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
-{
- unsigned long always_on = VMXON_CR0_ALWAYSON;
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
- if (to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
- SECONDARY_EXEC_UNRESTRICTED_GUEST &&
- nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
- always_on &= ~(X86_CR0_PE | X86_CR0_PG);
- return (val & always_on) == always_on;
-}
-
/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
{
@@ -5643,7 +5892,7 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
- if (!nested_cr0_valid(vcpu, val))
+ if (!nested_guest_cr0_valid(vcpu, val))
return 1;
if (kvm_set_cr0(vcpu, val))
@@ -5652,8 +5901,9 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
return 0;
} else {
if (to_vmx(vcpu)->nested.vmxon &&
- ((val & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON))
+ !nested_host_cr0_valid(vcpu, val))
return 1;
+
return kvm_set_cr0(vcpu, val);
}
}
@@ -5697,6 +5947,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
int cr;
int reg;
int err;
+ int ret;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
cr = exit_qualification & 15;
@@ -5708,25 +5959,27 @@ static int handle_cr(struct kvm_vcpu *vcpu)
switch (cr) {
case 0:
err = handle_set_cr0(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
case 3:
err = kvm_set_cr3(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
case 4:
err = handle_set_cr4(vcpu, val);
- kvm_complete_insn_gp(vcpu, err);
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
case 8: {
u8 cr8_prev = kvm_get_cr8(vcpu);
u8 cr8 = (u8)val;
err = kvm_set_cr8(vcpu, cr8);
- kvm_complete_insn_gp(vcpu, err);
+ ret = kvm_complete_insn_gp(vcpu, err);
if (lapic_in_kernel(vcpu))
- return 1;
+ return ret;
if (cr8_prev <= cr8)
- return 1;
+ return ret;
+ /*
+ * TODO: we might be squashing a
+ * KVM_GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
return 0;
}
@@ -5735,23 +5988,20 @@ static int handle_cr(struct kvm_vcpu *vcpu)
case 2: /* clts */
handle_clts(vcpu);
trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
- skip_emulated_instruction(vcpu);
vmx_fpu_activate(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
case 1: /*mov from cr*/
switch (cr) {
case 3:
val = kvm_read_cr3(vcpu);
kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
case 8:
val = kvm_get_cr8(vcpu);
kvm_register_write(vcpu, reg, val);
trace_kvm_cr_read(cr, val);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
break;
case 3: /* lmsw */
@@ -5759,8 +6009,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
kvm_lmsw(vcpu, val);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
default:
break;
}
@@ -5831,8 +6080,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg)))
return 1;
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
@@ -5864,8 +6112,7 @@ static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_cpuid(struct kvm_vcpu *vcpu)
{
- kvm_emulate_cpuid(vcpu);
- return 1;
+ return kvm_emulate_cpuid(vcpu);
}
static int handle_rdmsr(struct kvm_vcpu *vcpu)
@@ -5886,8 +6133,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
/* FIXME: handling of bits 32:63 of rax, rdx */
vcpu->arch.regs[VCPU_REGS_RAX] = msr_info.data & -1u;
vcpu->arch.regs[VCPU_REGS_RDX] = (msr_info.data >> 32) & -1u;
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_wrmsr(struct kvm_vcpu *vcpu)
@@ -5907,8 +6153,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
}
trace_kvm_msr_write(ecx, data);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
@@ -5952,8 +6197,7 @@ static int handle_invlpg(struct kvm_vcpu *vcpu)
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
kvm_mmu_invlpg(vcpu, exit_qualification);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_rdpmc(struct kvm_vcpu *vcpu)
@@ -5961,15 +6205,12 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
int err;
err = kvm_rdpmc(vcpu);
- kvm_complete_insn_gp(vcpu, err);
-
- return 1;
+ return kvm_complete_insn_gp(vcpu, err);
}
static int handle_wbinvd(struct kvm_vcpu *vcpu)
{
- kvm_emulate_wbinvd(vcpu);
- return 1;
+ return kvm_emulate_wbinvd(vcpu);
}
static int handle_xsetbv(struct kvm_vcpu *vcpu)
@@ -5978,20 +6219,20 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu)
u32 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
if (kvm_set_xcr(vcpu, index, new_bv) == 0)
- skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
return 1;
}
static int handle_xsaves(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
+ kvm_skip_emulated_instruction(vcpu);
WARN(1, "this should never happen\n");
return 1;
}
static int handle_xrstors(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
+ kvm_skip_emulated_instruction(vcpu);
WARN(1, "this should never happen\n");
return 1;
}
@@ -6012,8 +6253,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
if ((access_type == TYPE_LINEAR_APIC_INST_WRITE) &&
(offset == APIC_EOI)) {
kvm_lapic_set_eoi(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
}
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
@@ -6161,9 +6401,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
- skip_emulated_instruction(vcpu);
trace_kvm_fast_mmio(gpa);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
ret = handle_mmio_page_fault(vcpu, gpa, true);
@@ -6348,50 +6587,13 @@ static __init int hardware_setup(void)
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
- vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_a)
- return r;
+ for (i = 0; i < VMX_BITMAP_NR; i++) {
+ vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_bitmap[i])
+ goto out;
+ }
vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_b)
- goto out;
-
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
- goto out3;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out4;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out5;
-
- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
- goto out6;
-
- vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmread_bitmap)
- goto out7;
-
- vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmwrite_bitmap)
- goto out8;
-
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -6409,7 +6611,7 @@ static __init int hardware_setup(void)
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
- goto out9;
+ goto out;
}
if (boot_cpu_has(X86_FEATURE_NX))
@@ -6472,39 +6674,34 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
- memcpy(vmx_msr_bitmap_legacy_x2apic,
+ memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
+ memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ for (msr = 0x800; msr <= 0x8ff; msr++) {
+ if (msr == 0x839 /* TMCCT */)
+ continue;
+ vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
+ }
+
/*
- * enable_apicv && kvm_vcpu_apicv_active()
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
*/
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839, true);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808, true);
/* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+ vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
/* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
- /*
- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
- * !enable_apicv
- */
- /* TPR */
- vmx_disable_intercept_msr_read_x2apic(0x808, false);
- vmx_disable_intercept_msr_write_x2apic(0x808, false);
+ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
if (enable_ept) {
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -6551,42 +6748,19 @@ static __init int hardware_setup(void)
return alloc_kvm_area();
-out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
- free_page((unsigned long)vmx_vmread_bitmap);
-out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
-out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
- free_page((unsigned long)vmx_io_bitmap_b);
out:
- free_page((unsigned long)vmx_io_bitmap_a);
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
return r;
}
static __exit void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
- free_page((unsigned long)vmx_io_bitmap_b);
- free_page((unsigned long)vmx_io_bitmap_a);
- free_page((unsigned long)vmx_vmwrite_bitmap);
- free_page((unsigned long)vmx_vmread_bitmap);
+ int i;
+
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
free_kvm_area();
}
@@ -6600,16 +6774,13 @@ static int handle_pause(struct kvm_vcpu *vcpu)
if (ple_gap)
grow_ple_window(vcpu);
- skip_emulated_instruction(vcpu);
kvm_vcpu_on_spin(vcpu);
-
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_nop(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_mwait(struct kvm_vcpu *vcpu)
@@ -6916,8 +7087,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
*/
if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
nested_vmx_failInvalid(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
page = nested_get_page(vcpu, vmptr);
@@ -6925,8 +7095,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
*(u32 *)kmap(page) != VMCS12_REVISION) {
nested_vmx_failInvalid(vcpu);
kunmap(page);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
kunmap(page);
vmx->nested.vmxon_ptr = vmptr;
@@ -6935,30 +7104,26 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason,
if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_INVALID_ADDRESS);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
break;
case EXIT_REASON_VMPTRLD:
if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) {
nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INVALID_ADDRESS);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if (vmptr == vmx->nested.vmxon_ptr) {
nested_vmx_failValid(vcpu,
VMXERR_VMCLEAR_VMXON_POINTER);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
break;
default:
@@ -7014,8 +7179,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
if (vmx->nested.vmxon) {
nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
@@ -7055,9 +7219,8 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
vmx->nested.vmxon = true;
- skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
out_shadow_vmcs:
kfree(vmx->nested.cached_vmcs12);
@@ -7176,9 +7339,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
free_nested(to_vmx(vcpu));
- skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the VMCLEAR instruction */
@@ -7217,9 +7379,8 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
nested_free_vmcs02(vmx, vmptr);
- skip_emulated_instruction(vcpu);
nested_vmx_succeed(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch);
@@ -7417,7 +7578,6 @@ static int nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (vmx->nested.current_vmptr == -1ull) {
nested_vmx_failInvalid(vcpu);
- skip_emulated_instruction(vcpu);
return 0;
}
return 1;
@@ -7431,17 +7591,18 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
gva_t gva = 0;
- if (!nested_vmx_check_permission(vcpu) ||
- !nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_check_permission(vcpu))
return 1;
+ if (!nested_vmx_check_vmcs12(vcpu))
+ return kvm_skip_emulated_instruction(vcpu);
+
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
/* Read the field, zero-extended to a u64 field_value */
if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/*
* Now copy part of this value to register or memory, as requested.
@@ -7461,8 +7622,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
@@ -7481,10 +7641,12 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
u64 field_value = 0;
struct x86_exception e;
- if (!nested_vmx_check_permission(vcpu) ||
- !nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_check_permission(vcpu))
return 1;
+ if (!nested_vmx_check_vmcs12(vcpu))
+ return kvm_skip_emulated_instruction(vcpu);
+
if (vmx_instruction_info & (1u << 10))
field_value = kvm_register_readl(vcpu,
(((vmx_instruction_info) >> 3) & 0xf));
@@ -7504,19 +7666,16 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
if (vmcs_field_readonly(field)) {
nested_vmx_failValid(vcpu,
VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
if (vmcs12_write_any(vcpu, field, field_value) < 0) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the VMPTRLD instruction */
@@ -7537,8 +7696,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
page = nested_get_page(vcpu, vmptr);
if (page == NULL) {
nested_vmx_failInvalid(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
new_vmcs12 = kmap(page);
if (new_vmcs12->revision_id != VMCS12_REVISION) {
@@ -7546,8 +7704,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
nested_release_page_clean(page);
nested_vmx_failValid(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
nested_release_vmcs12(vmx);
@@ -7571,8 +7728,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the VMPTRST instruction */
@@ -7597,8 +7753,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
}
nested_vmx_succeed(vcpu);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* Emulate the INVEPT instruction */
@@ -7636,8 +7791,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* According to the Intel VMX instruction reference, the memory
@@ -7668,8 +7822,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
break;
}
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_invvpid(struct kvm_vcpu *vcpu)
@@ -7694,13 +7847,13 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
- types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7;
+ types = (vmx->nested.nested_vmx_vpid_caps &
+ VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
if (type >= 32 || !(types & (1 << type))) {
nested_vmx_failValid(vcpu,
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- skip_emulated_instruction(vcpu);
- return 1;
+ return kvm_skip_emulated_instruction(vcpu);
}
/* according to the intel vmx instruction reference, the memory
@@ -7716,23 +7869,26 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
}
switch (type) {
+ case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
- /*
- * Old versions of KVM use the single-context version so we
- * have to support it; just treat it the same as all-context.
- */
+ case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
+ if (!vpid) {
+ nested_vmx_failValid(vcpu,
+ VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+ return kvm_skip_emulated_instruction(vcpu);
+ }
+ break;
case VMX_VPID_EXTENT_ALL_CONTEXT:
- __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
- nested_vmx_succeed(vcpu);
break;
default:
- /* Trap individual address invalidation invvpid calls */
- BUG_ON(1);
- break;
+ WARN_ON_ONCE(1);
+ return kvm_skip_emulated_instruction(vcpu);
}
- skip_emulated_instruction(vcpu);
- return 1;
+ __vmx_flush_tlb(vcpu, vmx->nested.vpid02);
+ nested_vmx_succeed(vcpu);
+
+ return kvm_skip_emulated_instruction(vcpu);
}
static int handle_pml_full(struct kvm_vcpu *vcpu)
@@ -8071,6 +8227,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
case EXIT_REASON_IO_INSTRUCTION:
return nested_vmx_exit_handled_io(vcpu, vmcs12);
+ case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -8620,11 +8778,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
register void *__sp asm(_ASM_SP);
- /*
- * If external interrupt exists, IF bit is set in rflags/eflags on the
- * interrupt stack frame, and interrupt will be enabled on a return
- * from interrupt handler.
- */
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
unsigned int vector;
@@ -8809,7 +8962,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host);
}
-void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl;
@@ -9279,6 +9432,50 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
(new_ctl & ~mask) | (cur_ctl & mask));
}
+/*
+ * Generate MSR_IA32_VMX_CR{0,4}_FIXED1 according to CPUID. Only set bits
+ * (indicating "allowed-1") if they are supported in the guest's CPUID.
+ */
+static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_cpuid_entry2 *entry;
+
+ vmx->nested.nested_vmx_cr0_fixed1 = 0xffffffff;
+ vmx->nested.nested_vmx_cr4_fixed1 = X86_CR4_PCE;
+
+#define cr4_fixed1_update(_cr4_mask, _reg, _cpuid_mask) do { \
+ if (entry && (entry->_reg & (_cpuid_mask))) \
+ vmx->nested.nested_vmx_cr4_fixed1 |= (_cr4_mask); \
+} while (0)
+
+ entry = kvm_find_cpuid_entry(vcpu, 0x1, 0);
+ cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME));
+ cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME));
+ cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC));
+ cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE));
+ cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE));
+ cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE));
+ cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE));
+ cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE));
+ cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR));
+ cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM));
+ cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX));
+ cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX));
+ cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID));
+ cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE));
+
+ entry = kvm_find_cpuid_entry(vcpu, 0x7, 0);
+ cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE));
+ cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP));
+ cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP));
+ cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU));
+ /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */
+ cr4_fixed1_update(bit(11), ecx, bit(2));
+
+#undef cr4_fixed1_update
+}
+
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
@@ -9320,6 +9517,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
else
to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=
~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+
+ if (nested_vmx_allowed(vcpu))
+ nested_vmx_cr_fixed1_bits_update(vcpu);
}
static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
@@ -9774,6 +9974,49 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
return 0;
}
+static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ unsigned long invalid_mask;
+
+ invalid_mask = (~0ULL) << cpuid_maxphyaddr(vcpu);
+ return (val & invalid_mask) == 0;
+}
+
+/*
+ * Load guest's/host's cr3 at nested entry/exit. nested_ept is true if we are
+ * emulating VM entry into a guest with EPT enabled.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
+ */
+static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
+ unsigned long *entry_failure_code)
+{
+ if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
+ if (!nested_cr3_valid(vcpu, cr3)) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
+ }
+
+ /*
+ * If PAE paging and EPT are both on, CR3 is not used by the CPU and
+ * must not be dereferenced.
+ */
+ if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) &&
+ !nested_ept) {
+ if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) {
+ *entry_failure_code = ENTRY_FAIL_PDPTE;
+ return 1;
+ }
+ }
+
+ vcpu->arch.cr3 = cr3;
+ __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
+ }
+
+ kvm_mmu_reset_context(vcpu);
+ return 0;
+}
+
/*
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -9782,11 +10025,15 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
* needs. In addition to modifying the active vmcs (which is vmcs02), this
* function also has additional necessary side-effects, like setting various
* vcpu->arch fields.
+ * Returns 0 on success, 1 on failure. Invalid state exit qualification code
+ * is assigned to entry_failure_code on failure.
*/
-static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+ unsigned long *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
+ bool nested_ept_enabled = false;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -9951,6 +10198,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_intr_status);
}
+ nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}
@@ -9964,6 +10212,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx_set_constant_host_state(vmx);
/*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ /*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
* we wrote last (vmx->host_rsp). This cache is no longer relevant
@@ -10069,15 +10326,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
nested_ept_init_mmu_context(vcpu);
}
- if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
- vcpu->arch.efer = vmcs12->guest_ia32_efer;
- else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
- vcpu->arch.efer |= (EFER_LMA | EFER_LME);
- else
- vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
- /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
- vmx_set_efer(vcpu, vcpu->arch.efer);
-
/*
* This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified
* TS bit (for lazy fpu) and bits which we consider mandatory enabled.
@@ -10092,8 +10340,20 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx_set_cr4(vcpu, vmcs12->guest_cr4);
vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
- /* shadow page tables on either EPT or shadow page tables */
- kvm_set_cr3(vcpu, vmcs12->guest_cr3);
+ if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
+ vcpu->arch.efer = vmcs12->guest_ia32_efer;
+ else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
+ vcpu->arch.efer |= (EFER_LMA | EFER_LME);
+ else
+ vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
+ /* Note: modifies VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
+ vmx_set_efer(vcpu, vcpu->arch.efer);
+
+ /* Shadow page tables on either EPT or shadow page tables. */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+ entry_failure_code))
+ return 1;
+
kvm_mmu_reset_context(vcpu);
if (!enable_ept)
@@ -10111,6 +10371,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
kvm_register_write(vcpu, VCPU_REGS_RIP, vmcs12->guest_rip);
+ return 0;
}
/*
@@ -10125,12 +10386,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
struct loaded_vmcs *vmcs02;
bool ia32e;
u32 msr_entry_idx;
+ unsigned long exit_qualification;
- if (!nested_vmx_check_permission(vcpu) ||
- !nested_vmx_check_vmcs12(vcpu))
+ if (!nested_vmx_check_permission(vcpu))
return 1;
- skip_emulated_instruction(vcpu);
+ if (!nested_vmx_check_vmcs12(vcpu))
+ goto out;
+
vmcs12 = get_vmcs12(vcpu);
if (enable_shadow_vmcs)
@@ -10150,37 +10413,37 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
nested_vmx_failValid(vcpu,
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
- return 1;
+ goto out;
}
if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
- vmx->nested.nested_vmx_true_procbased_ctls_low,
+ vmx->nested.nested_vmx_procbased_ctls_low,
vmx->nested.nested_vmx_procbased_ctls_high) ||
!vmx_control_verify(vmcs12->secondary_vm_exec_control,
vmx->nested.nested_vmx_secondary_ctls_low,
@@ -10189,33 +10452,34 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmx->nested.nested_vmx_pinbased_ctls_low,
vmx->nested.nested_vmx_pinbased_ctls_high) ||
!vmx_control_verify(vmcs12->vm_exit_controls,
- vmx->nested.nested_vmx_true_exit_ctls_low,
+ vmx->nested.nested_vmx_exit_ctls_low,
vmx->nested.nested_vmx_exit_ctls_high) ||
!vmx_control_verify(vmcs12->vm_entry_controls,
- vmx->nested.nested_vmx_true_entry_ctls_low,
+ vmx->nested.nested_vmx_entry_ctls_low,
vmx->nested.nested_vmx_entry_ctls_high))
{
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
- return 1;
+ goto out;
}
- if (((vmcs12->host_cr0 & VMXON_CR0_ALWAYSON) != VMXON_CR0_ALWAYSON) ||
- ((vmcs12->host_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+ if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) ||
+ !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) ||
+ !nested_cr3_valid(vcpu, vmcs12->host_cr3)) {
nested_vmx_failValid(vcpu,
VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
- return 1;
+ goto out;
}
- if (!nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
- ((vmcs12->guest_cr4 & VMXON_CR4_ALWAYSON) != VMXON_CR4_ALWAYSON)) {
+ if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) ||
+ !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
- return 1;
+ goto out;
}
if (vmcs12->vmcs_link_pointer != -1ull) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR);
- return 1;
+ goto out;
}
/*
@@ -10235,7 +10499,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
- return 1;
+ goto out;
}
}
@@ -10253,7 +10517,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
- return 1;
+ goto out;
}
}
@@ -10266,6 +10530,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (!vmcs02)
return -ENOMEM;
+ /*
+ * After this point, the trap flag no longer triggers a singlestep trap
+ * on the vm entry instructions. Don't call
+ * kvm_skip_emulated_instruction.
+ */
+ skip_emulated_instruction(vcpu);
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -10280,7 +10550,13 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmx_segment_cache_clear(vmx);
- prepare_vmcs02(vcpu, vmcs12);
+ if (prepare_vmcs02(vcpu, vmcs12, &exit_qualification)) {
+ leave_guest_mode(vcpu);
+ vmx_load_vmcs01(vcpu);
+ nested_vmx_entry_failure(vcpu, vmcs12,
+ EXIT_REASON_INVALID_STATE, exit_qualification);
+ return 1;
+ }
msr_entry_idx = nested_vmx_load_msr(vcpu,
vmcs12->vm_entry_msr_load_addr,
@@ -10307,6 +10583,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* the success flag) when L2 exits (see nested_vmx_vmexit()).
*/
return 1;
+
+out:
+ return kvm_skip_emulated_instruction(vcpu);
}
/*
@@ -10612,6 +10891,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct kvm_segment seg;
+ unsigned long entry_failure_code;
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
@@ -10649,8 +10929,12 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
nested_ept_uninit_mmu_context(vcpu);
- kvm_set_cr3(vcpu, vmcs12->host_cr3);
- kvm_mmu_reset_context(vcpu);
+ /*
+ * Only PDPTE load can fail as the value of cr3 was checked on entry and
+ * couldn't have changed.
+ */
+ if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code))
+ nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
@@ -10751,6 +11035,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ u32 vm_inst_error = 0;
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
@@ -10763,6 +11048,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs12->vm_exit_msr_store_count))
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ if (unlikely(vmx->fail))
+ vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+
vmx_load_vmcs01(vcpu);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
@@ -10791,6 +11079,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->hv_deadline_tsc == -1)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
@@ -10839,7 +11129,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
if (unlikely(vmx->fail)) {
vmx->fail = 0;
- nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+ nested_vmx_failValid(vcpu, vm_inst_error);
} else
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2912684ff902..1f0d2383f5ee 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -434,12 +434,14 @@ void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr)
}
EXPORT_SYMBOL_GPL(kvm_requeue_exception);
-void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
+int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err)
{
if (err)
kvm_inject_gp(vcpu, 0);
else
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
+
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
@@ -573,7 +575,7 @@ out:
}
EXPORT_SYMBOL_GPL(load_pdptrs);
-static bool pdptrs_changed(struct kvm_vcpu *vcpu)
+bool pdptrs_changed(struct kvm_vcpu *vcpu)
{
u64 pdpte[ARRAY_SIZE(vcpu->arch.walk_mmu->pdptrs)];
bool changed = true;
@@ -599,6 +601,7 @@ out:
return changed;
}
+EXPORT_SYMBOL_GPL(pdptrs_changed);
int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
@@ -2178,7 +2181,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
- u64 gpa_offset;
struct kvm_arch *ka = &vcpu->kvm->arch;
kvmclock_reset(vcpu);
@@ -2200,8 +2202,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!(data & 1))
break;
- gpa_offset = data & ~(PAGE_MASK | 1);
-
if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
@@ -2296,7 +2296,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
+ vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n",
msr, data);
return 1;
} else {
@@ -2508,7 +2508,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
if (!ignore_msrs) {
- vcpu_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index);
+ vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n",
+ msr_info->index);
return 1;
} else {
vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index);
@@ -2812,7 +2813,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (kvm_lapic_hv_timer_in_use(vcpu) &&
kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_tscdeadline_msr(vcpu)))
+ kvm_get_lapic_target_expiration_tsc(vcpu)))
kvm_lapic_switch_to_sw_timer(vcpu);
/*
* On a host with synchronized TSC, there is no need to update
@@ -4832,7 +4833,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
-int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
+static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
{
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4852,8 +4853,8 @@ int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->skip_emulated_instruction(vcpu);
- return kvm_emulate_wbinvd_noskip(vcpu);
+ kvm_emulate_wbinvd_noskip(vcpu);
+ return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
@@ -5451,7 +5452,6 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
kvm_run->exit_reason = KVM_EXIT_DEBUG;
*r = EMULATE_USER_EXIT;
} else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
/*
* "Certain debug exceptions may clear bit 0-3. The
* remaining contents of the DR6 register are never
@@ -5464,6 +5464,17 @@ static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflag
}
}
+int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
+{
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+ int r = EMULATE_DONE;
+
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ return r == EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvm_skip_emulated_instruction);
+
static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
{
if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
@@ -5649,6 +5660,49 @@ int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port)
}
EXPORT_SYMBOL_GPL(kvm_fast_pio_out);
+static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
+{
+ unsigned long val;
+
+ /* We should only ever be called with arch.pio.count equal to 1 */
+ BUG_ON(vcpu->arch.pio.count != 1);
+
+ /* For size less than 4 we merge, else we zero extend */
+ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX)
+ : 0;
+
+ /*
+ * Since vcpu->arch.pio.count == 1 let emulator_pio_in_emulated perform
+ * the copy and tracing
+ */
+ emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, vcpu->arch.pio.size,
+ vcpu->arch.pio.port, &val, 1);
+ kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+
+ return 1;
+}
+
+int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, unsigned short port)
+{
+ unsigned long val;
+ int ret;
+
+ /* For size less than 4 we merge, else we zero extend */
+ val = (size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0;
+
+ ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port,
+ &val, 1);
+ if (ret) {
+ kvm_register_write(vcpu, VCPU_REGS_RAX, val);
+ return ret;
+ }
+
+ vcpu->arch.complete_userspace_io = complete_fast_pio_in;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_fast_pio_in);
+
static int kvmclock_cpu_down_prep(unsigned int cpu)
{
__this_cpu_write(cpu_tsc_khz, 0);
@@ -5998,8 +6052,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
{
- kvm_x86_ops->skip_emulated_instruction(vcpu);
- return kvm_vcpu_halt(vcpu);
+ int ret = kvm_skip_emulated_instruction(vcpu);
+ /*
+ * TODO: we might be squashing a GUESTDBG_SINGLESTEP-triggered
+ * KVM_EXIT_DEBUG here.
+ */
+ return kvm_vcpu_halt(vcpu) && ret;
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
@@ -6030,9 +6088,9 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
- int op_64_bit, r = 1;
+ int op_64_bit, r;
- kvm_x86_ops->skip_emulated_instruction(vcpu);
+ r = kvm_skip_emulated_instruction(vcpu);
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index bedfab98077a..e1fb269c87af 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -264,8 +264,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return 0;
error:
- dev_err(&dev->dev,
- "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ dev_err(&dev->dev, "Failed to create MSI%s! ret=%d!\n",
+ type == PCI_CAP_ID_MSI ? "" : "-X", irq);
return irq;
}
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index b27bccd4390f..821cb41f00e6 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -89,7 +89,7 @@ static void ce4100_mem_serial_out(struct uart_port *p, int offset, int value)
}
static void ce4100_serial_fixup(int port, struct uart_port *up,
- unsigned short *capabilites)
+ u32 *capabilites)
{
#ifdef CONFIG_EARLY_PRINTK
/*
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 0e98e5d241d0..a9fafb5c8738 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -19,7 +19,6 @@
int xen_swiotlb __read_mostly;
static struct dma_map_ops xen_swiotlb_dma_ops = {
- .mapping_error = xen_swiotlb_dma_mapping_error,
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
.sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f8960fca0827..8c394e30e5fe 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -41,7 +41,7 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
unsigned long xen_released_pages;
/* E820 map used during setting up memory. */
-static struct e820entry xen_e820_map[E820MAX] __initdata;
+static struct e820entry xen_e820_map[E820_X_MAX] __initdata;
static u32 xen_e820_map_entries __initdata;
/*
@@ -750,7 +750,7 @@ char * __init xen_memory_setup(void)
max_pfn = min(max_pfn, xen_start_info->nr_pages);
mem_end = PFN_PHYS(max_pfn);
- memmap.nr_entries = E820MAX;
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
set_xen_guest_handle(memmap.buffer, xen_e820_map);
op = xen_initial_domain() ?
@@ -923,7 +923,7 @@ char * __init xen_auto_xlated_memory_setup(void)
int i;
int rc;
- memmap.nr_entries = E820MAX;
+ memmap.nr_entries = ARRAY_SIZE(xen_e820_map);
set_xen_guest_handle(memmap.buffer, xen_e820_map);
rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
OpenPOWER on IntegriCloud