summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig3
-rw-r--r--arch/arm/vdso/Makefile3
-rw-r--r--arch/arm64/include/asm/elf.h2
-rw-r--r--arch/arm64/include/asm/pgtable.h1
-rw-r--r--arch/arm64/include/asm/processor.h14
-rw-r--r--arch/arm64/include/asm/stacktrace.h78
-rw-r--r--arch/arm64/kernel/entry.S22
-rw-r--r--arch/arm64/kernel/fpsimd.c29
-rw-r--r--arch/arm64/kernel/perf_callchain.c7
-rw-r--r--arch/arm64/kernel/process.c36
-rw-r--r--arch/arm64/kernel/return_address.c9
-rw-r--r--arch/arm64/kernel/stacktrace.c59
-rw-r--r--arch/arm64/kernel/time.c7
-rw-r--r--arch/arm64/kernel/traps.c13
-rw-r--r--arch/arm64/kernel/vdso/Makefile13
-rw-r--r--arch/arm64/kernel/vdso32/Makefile18
-rw-r--r--arch/hexagon/include/asm/pgalloc.h34
-rw-r--r--arch/parisc/include/asm/kprobes.h4
-rw-r--r--arch/parisc/kernel/pacache.S3
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/include/asm/hvcall.h11
-rw-r--r--arch/powerpc/include/asm/pmc.h5
-rw-r--r--arch/powerpc/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/powerpc/kernel/Makefile3
-rw-r--r--arch/powerpc/kernel/dma-common.c17
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/signal_32.c3
-rw-r--r--arch/powerpc/kernel/signal_64.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c13
-rw-r--r--arch/powerpc/kvm/book3s_xive.c4
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c4
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c9
-rw-r--r--arch/powerpc/mm/mem.c2
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c44
-rw-r--r--arch/powerpc/sysdev/xive/common.c7
-rw-r--r--arch/riscv/Makefile2
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi15
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts9
-rw-r--r--arch/riscv/include/asm/Kbuild1
-rw-r--r--arch/riscv/include/uapi/asm/unistd.h1
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/kvm/interrupt.c23
-rw-r--r--arch/s390/mm/init.c7
-rw-r--r--arch/sparc/vdso/Makefile3
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/compressed/eboot.c10
-rw-r--r--arch/x86/boot/compressed/misc.c1
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/compressed/pgtable_64.c1
-rw-r--r--arch/x86/entry/calling.h6
-rw-r--r--arch/x86/entry/entry_32.S61
-rw-r--r--arch/x86/entry/entry_64.S155
-rw-r--r--arch/x86/entry/thunk_64.S5
-rw-r--r--arch/x86/entry/vdso/Makefile5
-rw-r--r--arch/x86/hyperv/hv_init.c13
-rw-r--r--arch/x86/include/asm/apic.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h41
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/paravirt.h23
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/traps.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h9
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/head_64.S8
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/ptrace.c14
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/emulate.c44
-rw-r--r--arch/x86/kvm/hyperv.c20
-rw-r--r--arch/x86/kvm/ioapic.c15
-rw-r--r--arch/x86/kvm/lapic.c202
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/mmu.c8
-rw-r--r--arch/x86/kvm/pmu.c27
-rw-r--r--arch/x86/kvm/svm.c55
-rw-r--r--arch/x86/kvm/vmx/nested.c17
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c11
-rw-r--r--arch/x86/kvm/vmx/vmenter.S6
-rw-r--r--arch/x86/kvm/vmx/vmx.c19
-rw-r--r--arch/x86/kvm/x86.c36
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lib/copy_user_64.S2
-rw-r--r--arch/x86/lib/getuser.S20
-rw-r--r--arch/x86/lib/putuser.S29
-rw-r--r--arch/x86/lib/usercopy_64.c2
-rw-r--r--arch/x86/math-emu/fpu_emu.h2
-rw-r--r--arch/x86/math-emu/reg_constant.c2
-rw-r--r--arch/x86/mm/fault.c30
-rw-r--r--arch/x86/mm/mem_encrypt.c32
-rw-r--r--arch/x86/xen/enlighten_pv.c3
-rw-r--r--arch/x86/xen/mmu_pv.c12
-rw-r--r--arch/x86/xen/xen-asm.S16
-rw-r--r--arch/x86/xen/xen-ops.h3
99 files changed, 939 insertions, 648 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index ac0fba400ded..a7b57dd42c26 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -796,6 +796,9 @@ config ARCH_NO_COHERENT_DMA_MMAP
config ARCH_NO_PREEMPT
bool
+config ARCH_SUPPORTS_RT
+ bool
+
config CPU_NO_EFFICIENT_FFS
def_bool n
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index ca85df247775..87b7769214e0 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -13,8 +13,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8
ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
-z max-page-size=4096 -nostdlib -shared $(ldflags-y) \
- $(call ld-option, --hash-style=sysv) \
- $(call ld-option, --build-id) \
+ --hash-style=sysv --build-id \
-T
obj-$(CONFIG_VDSO) += vdso.o
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 3c7037c6ba9b..b618017205a3 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -202,7 +202,7 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
({ \
set_thread_flag(TIF_32BIT); \
})
-#ifdef CONFIG_GENERIC_COMPAT_VDSO
+#ifdef CONFIG_COMPAT_VDSO
#define COMPAT_ARCH_DLINFO \
do { \
/* \
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 87a4b2ddc1a1..3f5461f7b560 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -301,7 +301,6 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
/*
* Huge pte definitions.
*/
-#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
/*
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index fd5b1a4efc70..844e2964b0f5 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -193,6 +193,16 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
regs->pmr_save = GIC_PRIO_IRQON;
}
+static inline void set_ssbs_bit(struct pt_regs *regs)
+{
+ regs->pstate |= PSR_SSBS_BIT;
+}
+
+static inline void set_compat_ssbs_bit(struct pt_regs *regs)
+{
+ regs->pstate |= PSR_AA32_SSBS_BIT;
+}
+
static inline void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
@@ -200,7 +210,7 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
regs->pstate = PSR_MODE_EL0t;
if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
- regs->pstate |= PSR_SSBS_BIT;
+ set_ssbs_bit(regs);
regs->sp = sp;
}
@@ -219,7 +229,7 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
#endif
if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
- regs->pstate |= PSR_AA32_SSBS_BIT;
+ set_compat_ssbs_bit(regs);
regs->compat_sp = sp;
}
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index df45af931459..4d9b1f48dc39 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -8,19 +8,12 @@
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/types.h>
#include <asm/memory.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
-struct stackframe {
- unsigned long fp;
- unsigned long pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- int graph;
-#endif
-};
-
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
@@ -28,6 +21,7 @@ enum stack_type {
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
+ __NR_STACK_TYPES
};
struct stack_info {
@@ -36,6 +30,37 @@ struct stack_info {
enum stack_type type;
};
+/*
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
+ *
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The fp value in the frame record (or the real lr)
+ *
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ * longer valid to unwind to.
+ *
+ * @prev_fp: The fp that pointed to this frame record, or a synthetic value
+ * of 0. This is used to ensure that within a stack, each
+ * subsequent frame record is at an increasing address.
+ * @prev_type: The type of stack this frame record was on, or a synthetic
+ * value of STACK_TYPE_UNKNOWN. This is used to detect a
+ * transition from one stack to another.
+ *
+ * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a
+ * replacement lr value in the ftrace graph stack.
+ */
+struct stackframe {
+ unsigned long fp;
+ unsigned long pc;
+ DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+ unsigned long prev_fp;
+ enum stack_type prev_type;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int graph;
+#endif
+};
+
extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
@@ -64,8 +89,9 @@ static inline bool on_irq_stack(unsigned long sp,
return true;
}
-static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp,
- struct stack_info *info)
+static inline bool on_task_stack(const struct task_struct *tsk,
+ unsigned long sp,
+ struct stack_info *info)
{
unsigned long low = (unsigned long)task_stack_page(tsk);
unsigned long high = low + THREAD_SIZE;
@@ -112,10 +138,13 @@ static inline bool on_overflow_stack(unsigned long sp,
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
-static inline bool on_accessible_stack(struct task_struct *tsk,
- unsigned long sp,
- struct stack_info *info)
+static inline bool on_accessible_stack(const struct task_struct *tsk,
+ unsigned long sp,
+ struct stack_info *info)
{
+ if (info)
+ info->type = STACK_TYPE_UNKNOWN;
+
if (on_task_stack(tsk, sp, info))
return true;
if (tsk != current || preemptible())
@@ -130,4 +159,27 @@ static inline bool on_accessible_stack(struct task_struct *tsk,
return false;
}
+static inline void start_backtrace(struct stackframe *frame,
+ unsigned long fp, unsigned long pc)
+{
+ frame->fp = fp;
+ frame->pc = pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ frame->graph = 0;
+#endif
+
+ /*
+ * Prime the first unwind.
+ *
+ * In unwind_frame() we'll check that the FP points to a valid stack,
+ * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+ * treated as a transition to whichever stack that happens to be. The
+ * prev_fp value won't be used, but we set it to 0 such that it is
+ * definitely not an accessible stack address.
+ */
+ bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+ frame->prev_fp = 0;
+ frame->prev_type = STACK_TYPE_UNKNOWN;
+}
+
#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9cdc4592da3e..320a30dbe35e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -586,10 +586,8 @@ el1_sync:
b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
- cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el1_sp_pc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el1_sp_pc
+ b.eq el1_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
b.eq el1_undef
cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
@@ -611,9 +609,11 @@ el1_da:
bl do_mem_abort
kernel_exit 1
-el1_sp_pc:
+el1_pc:
/*
- * Stack or PC alignment exception handling
+ * PC alignment exception handling. We don't handle SP alignment faults,
+ * since we will have hit a recursive exception when trying to push the
+ * initial pt_regs.
*/
mrs x0, far_el1
inherit_daif pstate=x23, tmp=x2
@@ -732,9 +732,9 @@ el0_sync:
ccmp x24, #ESR_ELx_EC_WFx, #4, ne
b.eq el0_sys
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el0_sp_pc
+ b.eq el0_sp
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_sp_pc
+ b.eq el0_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
@@ -758,7 +758,7 @@ el0_sync_compat:
cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_sp_pc
+ b.eq el0_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
@@ -858,11 +858,15 @@ el0_fpsimd_exc:
mov x1, sp
bl do_fpsimd_exc
b ret_to_user
+el0_sp:
+ ldr x26, [sp, #S_SP]
+ b el0_sp_pc
+el0_pc:
+ mrs x26, far_el1
el0_sp_pc:
/*
* Stack or PC alignment exception handling
*/
- mrs x26, far_el1
gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index eec4776ae5f0..37d3912cfe06 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -406,6 +406,18 @@ static __uint128_t arm64_cpu_to_le128(__uint128_t x)
#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
+static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
+ unsigned int vq)
+{
+ unsigned int i;
+ __uint128_t *p;
+
+ for (i = 0; i < SVE_NUM_ZREGS; ++i) {
+ p = (__uint128_t *)ZREG(sst, vq, i);
+ *p = arm64_cpu_to_le128(fst->vregs[i]);
+ }
+}
+
/*
* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
* task->thread.sve_state.
@@ -423,17 +435,12 @@ static void fpsimd_to_sve(struct task_struct *task)
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- unsigned int i;
- __uint128_t *p;
if (!system_supports_sve())
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
- for (i = 0; i < 32; ++i) {
- p = (__uint128_t *)ZREG(sst, vq, i);
- *p = arm64_cpu_to_le128(fst->vregs[i]);
- }
+ __fpsimd_to_sve(sst, fst, vq);
}
/*
@@ -459,7 +466,7 @@ static void sve_to_fpsimd(struct task_struct *task)
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
- for (i = 0; i < 32; ++i) {
+ for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t const *)ZREG(sst, vq, i);
fst->vregs[i] = arm64_le128_to_cpu(*p);
}
@@ -550,8 +557,6 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- unsigned int i;
- __uint128_t *p;
if (!test_tsk_thread_flag(task, TIF_SVE))
return;
@@ -559,11 +564,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
vq = sve_vq_from_vl(task->thread.sve_vl);
memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
-
- for (i = 0; i < 32; ++i) {
- p = (__uint128_t *)ZREG(sst, vq, i);
- *p = arm64_cpu_to_le128(fst->vregs[i]);
- }
+ __fpsimd_to_sve(sst, fst, vq);
}
int sve_set_vector_length(struct task_struct *task,
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index 9d63514b9836..b0e03e052dd1 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -154,12 +154,7 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
return;
}
- frame.fp = regs->regs[29];
- frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
-
+ start_backtrace(&frame, regs->regs[29], regs->pc);
walk_stackframe(current, &frame, callchain_trace, entry);
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6a869d9f304f..f674f28df663 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -398,7 +398,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
childregs->pstate |= PSR_UAO_BIT;
if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
- childregs->pstate |= PSR_SSBS_BIT;
+ set_ssbs_bit(childregs);
if (system_uses_irq_prio_masking())
childregs->pmr_save = GIC_PRIO_IRQON;
@@ -443,6 +443,32 @@ void uao_thread_switch(struct task_struct *next)
}
/*
+ * Force SSBS state on context-switch, since it may be lost after migrating
+ * from a CPU which treats the bit as RES0 in a heterogeneous system.
+ */
+static void ssbs_thread_switch(struct task_struct *next)
+{
+ struct pt_regs *regs = task_pt_regs(next);
+
+ /*
+ * Nothing to do for kernel threads, but 'regs' may be junk
+ * (e.g. idle task) so check the flags and bail early.
+ */
+ if (unlikely(next->flags & PF_KTHREAD))
+ return;
+
+ /* If the mitigation is enabled, then we leave SSBS clear. */
+ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
+ test_tsk_thread_flag(next, TIF_SSBD))
+ return;
+
+ if (compat_user_mode(regs))
+ set_compat_ssbs_bit(regs);
+ else if (user_mode(regs))
+ set_ssbs_bit(regs);
+}
+
+/*
* We store our current task in sp_el0, which is clobbered by userspace. Keep a
* shadow copy so that we can restore this upon entry from userspace.
*
@@ -471,6 +497,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
uao_thread_switch(next);
ptrauth_thread_switch(next);
+ ssbs_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
@@ -498,11 +525,8 @@ unsigned long get_wchan(struct task_struct *p)
if (!stack_page)
return 0;
- frame.fp = thread_saved_fp(p);
- frame.pc = thread_saved_pc(p);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
+ start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
+
do {
if (unwind_frame(p, &frame))
goto out;
diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c
index b21cba90f82d..c4ae647d2306 100644
--- a/arch/arm64/kernel/return_address.c
+++ b/arch/arm64/kernel/return_address.c
@@ -38,12 +38,9 @@ void *return_address(unsigned int level)
data.level = level + 2;
data.addr = NULL;
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)return_address; /* dummy */
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
-
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)return_address);
walk_stackframe(current, &frame, save_return_addr, &data);
if (!data.level)
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 62d395151abe..2b160ae594eb 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -29,9 +29,18 @@
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
+
+/*
+ * Unwind from one frame record (A) to the next frame record (B).
+ *
+ * We terminate early if the location of B indicates a malformed chain of frame
+ * records (e.g. a cycle), determined based on the location and fp value of A
+ * and the location (but not the fp value) of B.
+ */
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
unsigned long fp = frame->fp;
+ struct stack_info info;
if (fp & 0xf)
return -EINVAL;
@@ -39,11 +48,40 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
if (!tsk)
tsk = current;
- if (!on_accessible_stack(tsk, fp, NULL))
+ if (!on_accessible_stack(tsk, fp, &info))
+ return -EINVAL;
+
+ if (test_bit(info.type, frame->stacks_done))
return -EINVAL;
+ /*
+ * As stacks grow downward, any valid record on the same stack must be
+ * at a strictly higher address than the prior record.
+ *
+ * Stacks can nest in several valid orders, e.g.
+ *
+ * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+ * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ *
+ * ... but the nesting itself is strict. Once we transition from one
+ * stack to another, it's never valid to unwind back to that first
+ * stack.
+ */
+ if (info.type == frame->prev_type) {
+ if (fp <= frame->prev_fp)
+ return -EINVAL;
+ } else {
+ set_bit(frame->prev_type, frame->stacks_done);
+ }
+
+ /*
+ * Record this frame record's values and location. The prev_fp and
+ * prev_type are only meaningful to the next unwind_frame() invocation.
+ */
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+ frame->prev_fp = fp;
+ frame->prev_type = info.type;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
@@ -122,12 +160,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
data.skip = trace->skip;
data.no_sched_functions = 0;
- frame.fp = regs->regs[29];
- frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
-
+ start_backtrace(&frame, regs->regs[29], regs->pc);
walk_stackframe(current, &frame, save_trace, &data);
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
@@ -146,17 +179,15 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
data.no_sched_functions = nosched;
if (tsk != current) {
- frame.fp = thread_saved_fp(tsk);
- frame.pc = thread_saved_pc(tsk);
+ start_backtrace(&frame, thread_saved_fp(tsk),
+ thread_saved_pc(tsk));
} else {
/* We don't want this function nor the caller */
data.skip += 2;
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)__save_stack_trace;
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)__save_stack_trace);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
walk_stackframe(tsk, &frame, save_trace, &data);
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 9f25aedeac9d..0b2946414dc9 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -38,11 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
if (!in_lock_functions(regs->pc))
return regs->pc;
- frame.fp = regs->regs[29];
- frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
+ start_backtrace(&frame, regs->regs[29], regs->pc);
+
do {
int ret = unwind_frame(NULL, &frame);
if (ret < 0)
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8c03456dade6..d3313797cca9 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -100,18 +100,17 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
return;
if (tsk == current) {
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)dump_backtrace;
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)dump_backtrace);
} else {
/*
* task blocked in __switch_to
*/
- frame.fp = thread_saved_fp(tsk);
- frame.pc = thread_saved_pc(tsk);
+ start_backtrace(&frame,
+ thread_saved_fp(tsk),
+ thread_saved_pc(tsk));
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
printk("Call trace:\n");
do {
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 4ab863045188..dd2514bb1511 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -32,10 +32,10 @@ UBSAN_SANITIZE := n
OBJECT_FILES_NON_STANDARD := y
KCOV_INSTRUMENT := n
-ifeq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
-else
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
# Clang versions less than 8 do not support -mcmodel=tiny
@@ -57,8 +57,7 @@ $(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,ld)
- $(call if_changed,vdso_check)
+ $(call if_changed,vdsold_and_vdso_check)
# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -74,8 +73,8 @@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
# Actual build commands
-quiet_cmd_vdsocc = VDSOCC $@
- cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
+quiet_cmd_vdsold_and_vdso_check = LD $@
+ cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 288c14d30b45..1fba0776ed40 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -96,8 +96,8 @@ VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft
-VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--hash-style=sysv)
-VDSO_LDFLAGS += $(call cc32-ldoption,-Wl$(comma)--build-id)
+VDSO_LDFLAGS += -Wl,--hash-style=sysv
+VDSO_LDFLAGS += -Wl,--build-id
VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd)
@@ -144,8 +144,7 @@ $(obj)/vdso.so.dbg: $(obj)/vdso.so.raw $(obj)/$(munge) FORCE
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,vdsold)
- $(call if_changed,vdso_check)
+ $(call if_changed,vdsold_and_vdso_check)
# Compilation rules for the vDSO sources
$(c-obj-vdso): %.o: %.c FORCE
@@ -156,14 +155,17 @@ $(asm-obj-vdso): %.o: %.S FORCE
$(call if_changed_dep,vdsoas)
# Actual build commands
-quiet_cmd_vdsold = VDSOL $@
+quiet_cmd_vdsold_and_vdso_check = LD32 $@
+ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
+
+quiet_cmd_vdsold = LD32 $@
cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
-Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
-quiet_cmd_vdsocc = VDSOC $@
+quiet_cmd_vdsocc = CC32 $@
cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
-quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@
+quiet_cmd_vdsocc_gettimeofday = CC32 $@
cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
-quiet_cmd_vdsoas = VDSOA $@
+quiet_cmd_vdsoas = AS32 $@
cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
quiet_cmd_vdsomunge = MUNGE $@
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 3c9e1bd9a3e9..d6544dc71258 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -11,6 +11,8 @@
#include <asm/mem-layout.h>
#include <asm/atomic.h>
+#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
+
#define check_pgt_cache() do {} while (0)
extern unsigned long long kmap_generation;
@@ -46,38 +48,6 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long) pgd);
}
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
- struct page *pte;
-
- pte = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!pte)
- return NULL;
- if (!pgtable_page_ctor(pte)) {
- __free_page(pte);
- return NULL;
- }
- return pte;
-}
-
-/* _kernel variant gets to use a different allocator */
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- gfp_t flags = GFP_KERNEL | __GFP_ZERO;
- return (pte_t *) __get_free_page(flags);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
- pgtable_page_dtor(pte);
- __free_page(pte);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte)
{
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
index e09cf2deeafe..904034da4974 100644
--- a/arch/parisc/include/asm/kprobes.h
+++ b/arch/parisc/include/asm/kprobes.h
@@ -50,6 +50,10 @@ struct kprobe_ctlblk {
int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs);
int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs);
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ return 0;
+}
#endif /* CONFIG_KPROBES */
#endif /* _PARISC_KPROBES_H */
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index ba67893a1d72..df46b0e5a915 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -63,7 +63,7 @@ ENTRY_CFI(flush_tlb_all_local)
/* Flush Instruction Tlb */
- LDREG ITLB_SID_BASE(%r1), %r20
+88: LDREG ITLB_SID_BASE(%r1), %r20
LDREG ITLB_SID_STRIDE(%r1), %r21
LDREG ITLB_SID_COUNT(%r1), %r22
LDREG ITLB_OFF_BASE(%r1), %arg0
@@ -103,6 +103,7 @@ fitonemiddle: /* Loop if LOOP = 1 */
add %r21, %r20, %r20 /* increment space */
fitdone:
+ ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
/* Flush Data Tlb */
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d8dcd8820369..77f6ebf97113 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -121,6 +121,7 @@ config PPC
select ARCH_32BIT_OFF_T if PPC32
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 463c63a9fcf1..11112023e327 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -302,9 +302,14 @@
#define H_SCM_UNBIND_MEM 0x3F0
#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
-#define H_SCM_MEM_QUERY 0x3FC
-#define H_SCM_BLOCK_CLEAR 0x400
-#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR
+#define H_SCM_UNBIND_ALL 0x3FC
+#define H_SCM_HEALTH 0x400
+#define H_SCM_PERFORMANCE_STATS 0x418
+#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS
+
+/* Scope args for H_SCM_UNBIND_ALL */
+#define H_UNBIND_SCOPE_ALL (0x1)
+#define H_UNBIND_SCOPE_DRC (0x2)
/* H_VIOCTL functions */
#define H_GET_VIOA_DUMP_SIZE 0x01
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index dc9a1ca70edf..c6bbe9778d3c 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -27,11 +27,10 @@ static inline void ppc_set_pmu_inuse(int inuse)
#ifdef CONFIG_PPC_PSERIES
get_lppaca()->pmcregs_in_use = inuse;
#endif
- } else {
+ }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- get_paca()->pmcregs_in_use = inuse;
+ get_paca()->pmcregs_in_use = inuse;
#endif
- }
#endif
}
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 01555c6ae0f5..be48c2215fa2 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -31,7 +31,7 @@
* Struct fields are always 32 or 64 bit aligned, depending on them being 32
* or 64 bit wide respectively.
*
- * See Documentation/virtual/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.txt
*/
struct kvm_vcpu_arch_shared {
__u64 scratch1;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 56dfa7a2a6f2..ea0c69236789 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,7 +49,8 @@ obj-y := cputable.o ptrace.o syscalls.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o
+ of_platform.o prom_parse.o \
+ dma-common.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c
new file mode 100644
index 000000000000..dc7ef6b17b69
--- /dev/null
+++ b/arch/powerpc/kernel/dma-common.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Contains common dma routines for all powerpc platforms.
+ *
+ * Copyright (C) 2019 Shawn Anastasio.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-noncoherent.h>
+
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+ unsigned long attrs)
+{
+ if (!dev_is_dma_coherent(dev))
+ return pgprot_noncached(prot);
+ return prot;
+}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eee5bef736c8..6ba3cc2ef8ab 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1531,7 +1531,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
*
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*
* For hypercalls, the register convention is as follows:
* r0 volatile
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index f50b708d6d77..98600b276f76 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1198,6 +1198,9 @@ SYSCALL_DEFINE0(rt_sigreturn)
goto bad;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto bad;
/* We only recheckpoint on return if we're
* transaction.
*/
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 2f80e270c7b0..117515564ec7 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -771,6 +771,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
+
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto badframe;
+
if (__get_user(uc_transact, &uc->uc_link))
goto badframe;
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ec1804f822af..cde3f5a4b3e4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3569,9 +3569,18 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
+ /*
+ * We need to save and restore the guest visible part of the
+ * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+ * doesn't do this for us. Note only required if pseries since
+ * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ */
+ unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
struct hv_guest_state hvregs;
+ host_psscr = mfspr(SPRN_PSSCR_PR);
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
kvmhv_save_hv_regs(vcpu, &hvregs);
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
@@ -3590,6 +3599,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+ mtspr(SPRN_PSSCR_PR, host_psscr);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
@@ -3654,6 +3665,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
+ /* Must save pmu if this guest is capable of running nested guests */
+ save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 6ca0d7376a9f..e3ba67095895 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1986,10 +1986,8 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
xive->single_escalation = xive_native_has_single_escalation();
- if (ret) {
- kfree(xive);
+ if (ret)
return ret;
- }
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5596c8ec221a..a998823f68a3 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -1090,9 +1090,9 @@ static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
xive->ops = &kvmppc_xive_native_ops;
if (ret)
- kfree(xive);
+ return ret;
- return ret;
+ return 0;
}
/*
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9a5963e07a82..b8ad14bb1170 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1899,11 +1899,20 @@ void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
*
* For guests on platforms before POWER9, we clamp the it limit to 1G
* to avoid some funky things such as RTAS bugs etc...
+ *
+ * On POWER9 we limit to 1TB in case the host erroneously told us that
+ * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+ * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+ * for virtual real mode addressing and so it doesn't make sense to
+ * have an area larger than 1TB as it can't be addressed.
*/
if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
ppc64_rma_size = first_memblock_size;
if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+ else
+ ppc64_rma_size = min_t(u64, ppc64_rma_size,
+ 1UL << SID_SHIFT_1T);
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9259337d7374..9191a66b3bc5 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -239,7 +239,7 @@ void __init paging_init(void)
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
+ 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index c8ec670ee924..2c07908359b2 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/libnvdimm.h>
#include <linux/platform_device.h>
+#include <linux/delay.h>
#include <asm/plpar_wrappers.h>
@@ -43,8 +44,9 @@ struct papr_scm_priv {
static int drc_pmem_bind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
- uint64_t rc, token;
uint64_t saved = 0;
+ uint64_t token;
+ int64_t rc;
/*
* When the hypervisor cannot map all the requested memory in a single
@@ -64,6 +66,10 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
} while (rc == H_BUSY);
if (rc) {
+ /* H_OVERLAP needs a separate error path */
+ if (rc == H_OVERLAP)
+ return -EBUSY;
+
dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
return -ENXIO;
}
@@ -78,22 +84,36 @@ static int drc_pmem_bind(struct papr_scm_priv *p)
static int drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
- uint64_t rc, token;
+ uint64_t token = 0;
+ int64_t rc;
- token = 0;
+ dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
- /* NB: unbind has the same retry requirements mentioned above */
+ /* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
- rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
- p->bound_addr, p->blocks, token);
+
+ /* Unbind of all SCM resources associated with drcIndex */
+ rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
+ p->drc_index, token);
token = ret[0];
- cond_resched();
+
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
} while (rc == H_BUSY);
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+ else
+ dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+ p->drc_index);
- return !!rc;
+ return rc == H_SUCCESS ? 0 : -ENXIO;
}
static int papr_scm_meta_get(struct papr_scm_priv *p,
@@ -389,6 +409,14 @@ static int papr_scm_probe(struct platform_device *pdev)
/* request the hypervisor to bind this region to somewhere in memory */
rc = drc_pmem_bind(p);
+
+ /* If phyp says drc memory still bound then force unbound and retry */
+ if (rc == -EBUSY) {
+ dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
+ drc_pmem_unbind(p);
+ rc = drc_pmem_bind(p);
+ }
+
if (rc)
goto err;
diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c
index 082c7e1c20f0..1cdb39575eae 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -479,7 +479,7 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
* Now go through the entire mask until we find a valid
* target.
*/
- for (;;) {
+ do {
/*
* We re-check online as the fallback case passes us
* an untested affinity mask
@@ -487,12 +487,11 @@ static int xive_find_target_in_mask(const struct cpumask *mask,
if (cpu_online(cpu) && xive_try_pick_target(cpu))
return cpu;
cpu = cpumask_next(cpu, mask);
- if (cpu == first)
- break;
/* Wrap around */
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(mask);
- }
+ } while (cpu != first);
+
return -1;
}
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f8b3b07e4247..7a117be8297c 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -34,8 +34,6 @@ else
KBUILD_LDFLAGS += -melf32lriscv
endif
-KBUILD_CFLAGS += -Wall
-
# ISA string setting
riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 40983491b95f..9bf63f0ab253 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -217,5 +217,20 @@
#size-cells = <0>;
status = "disabled";
};
+ eth0: ethernet@10090000 {
+ compatible = "sifive,fu540-c000-gem";
+ interrupt-parent = <&plic0>;
+ interrupts = <53>;
+ reg = <0x0 0x10090000 0x0 0x2000
+ 0x0 0x100a0000 0x0 0x1000>;
+ local-mac-address = [00 00 00 00 00 00];
+ clock-names = "pclk", "hclk";
+ clocks = <&prci PRCI_CLK_GEMGXLPLL>,
+ <&prci PRCI_CLK_GEMGXLPLL>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
};
};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 0b55c53c08c7..93d68cbd64fe 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -76,3 +76,12 @@
disable-wp;
};
};
+
+&eth0 {
+ status = "okay";
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+};
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 1efaeddf1e4b..16970f246860 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
+generic-y += msi.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index 0e2eeeb1fd27..13ce76cc5aff 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -18,6 +18,7 @@
#ifdef __LP64__
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_CLONE3
#endif /* __LP64__ */
#include <asm-generic/unistd.h>
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 5d8570ed6cab..a4ad2733eedf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -189,6 +189,7 @@ config S390
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select HAVE_NMI
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select SWIOTLB
select GENERIC_ALLOCATOR
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 9dde4d7d8704..b5fd6e85657c 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1224,28 +1224,11 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- /*
- * We cannot move this into the if, as the CPU might be already
- * in kvm_vcpu_block without having the waitqueue set (polling)
- */
vcpu->valid_wakeup = true;
+ kvm_vcpu_wake_up(vcpu);
+
/*
- * This is mostly to document, that the read in swait_active could
- * be moved before other stores, leading to subtle races.
- * All current users do not store or use an atomic like update
- */
- smp_mb__after_atomic();
- if (swait_active(&vcpu->wq)) {
- /*
- * The vcpu gave up the cpu voluntarily, mark it as a good
- * yield-candidate.
- */
- vcpu->preempted = true;
- swake_up_one(&vcpu->wq);
- vcpu->stat.halt_wakeup++;
- }
- /*
- * The VCPU might not be sleeping but is executing the VSIE. Let's
+ * The VCPU might not be sleeping but rather executing VSIE. Let's
* kick it, so it leaves the SIE to process the request.
*/
kvm_s390_vsie_kick(vcpu);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 4e5bbe328594..20340a03ad90 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -30,7 +30,7 @@
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -161,6 +161,11 @@ bool sev_active(void)
return is_prot_virt_guest();
}
+bool force_dma_unencrypted(struct device *dev)
+{
+ return sev_active();
+}
+
/* protected virtualization */
static void pv_init(void)
{
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 5a9e4e1f9f81..324a23947585 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -115,8 +115,7 @@ quiet_cmd_vdso = VDSO $@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(OBJDUMP)' '$@'
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) -Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id -Bsymbolic
GCOV_PROFILE := n
#
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 78772870facd..222855cc0158 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1526,6 +1526,7 @@ config AMD_MEM_ENCRYPT
depends on X86_64 && CPU_SUP_AMD
select DYNAMIC_PHYSICAL_MASK
select ARCH_USE_MEMREMAP_PROT
+ select ARCH_HAS_FORCE_DMA_UNENCRYPTED
---help---
Say yes to enable support for the encryption of system memory.
This requires an AMD processor that supports Secure Memory
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 220d1279d0e2..d6662fdef300 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -384,14 +384,11 @@ struct boot_params *make_boot_params(struct efi_config *c)
struct apm_bios_info *bi;
struct setup_header *hdr;
efi_loaded_image_t *image;
- void *options, *handle;
+ void *handle;
efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID;
int options_size = 0;
efi_status_t status;
char *cmdline_ptr;
- u16 *s2;
- u8 *s1;
- int i;
unsigned long ramdisk_addr;
unsigned long ramdisk_size;
@@ -494,8 +491,6 @@ static void add_e820ext(struct boot_params *params,
struct setup_data *e820ext, u32 nr_entries)
{
struct setup_data *data;
- efi_status_t status;
- unsigned long size;
e820ext->type = SETUP_E820_EXT;
e820ext->len = nr_entries * sizeof(struct boot_e820_entry);
@@ -677,8 +672,6 @@ static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
void *priv)
{
const char *signature;
- __u32 nr_desc;
- efi_status_t status;
struct exit_boot_struct *p = priv;
signature = efi_is_64bit() ? EFI64_LOADER_SIGNATURE
@@ -747,7 +740,6 @@ struct boot_params *
efi_main(struct efi_config *c, struct boot_params *boot_params)
{
struct desc_ptr *gdt = NULL;
- efi_loaded_image_t *image;
struct setup_header *hdr = &boot_params->hdr;
efi_status_t status;
struct desc_struct *desc;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 24e65a0f756d..53ac0cb2396d 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -17,6 +17,7 @@
#include "pgtable.h"
#include "../string.h"
#include "../voffset.h"
+#include <asm/bootparam_utils.h>
/*
* WARNING!!
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index d2f184165934..c8181392f70d 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -23,7 +23,6 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
-#include <asm/bootparam_utils.h>
#define BOOT_CTYPE_H
#include <linux/acpi.h>
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index f8debf7aeb4c..5f2d03067ae5 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -40,7 +40,6 @@ int cmdline_find_option_bool(const char *option);
static unsigned long find_trampoline_placement(void)
{
unsigned long bios_start = 0, ebda_start = 0;
- unsigned long trampoline_start;
struct boot_e820_entry *entry;
char *signature;
int i;
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 9f1f9e3b8230..830bd984182b 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -343,3 +343,9 @@ For 32-bit we have the following conventions - kernel is built with
.Lafter_call_\@:
#endif
.endm
+
+#ifdef CONFIG_PARAVIRT_XXL
+#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg
+#else
+#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg
+#endif
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90b473297299..2bb986f305ac 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,9 +294,11 @@
.Lfinished_frame_\@:
.endm
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
cld
+.if \skip_gs == 0
PUSH_GS
+.endif
FIXUP_FRAME
pushl %fs
pushl %es
@@ -313,13 +315,13 @@
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
+.if \skip_gs == 0
SET_KERNEL_GS %edx
-
+.endif
/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
SWITCH_TO_KERNEL_STACK
.endif
-
.endm
.macro SAVE_ALL_NMI cr3_reg:req
@@ -1441,39 +1443,46 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
ENTRY(page_fault)
ASM_CLAC
- pushl $do_page_fault
- ALIGN
- jmp common_exception
+ pushl $0; /* %gs's slot on the stack */
+
+ SAVE_ALL switch_stacks=1 skip_gs=1
+
+ ENCODE_FRAME_POINTER
+ UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
+ GS_TO_REG %ecx
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
+
+ GET_CR2_INTO(%ecx) # might clobber %eax
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
+ TRACE_IRQS_OFF
+ movl %esp, %eax # pt_regs pointer
+ call do_page_fault
+ jmp ret_from_exception
END(page_fault)
common_exception:
/* the function address is in %gs's slot on the stack */
- FIXUP_FRAME
- pushl %fs
- pushl %es
- pushl %ds
- pushl %eax
- movl $(__USER_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl $(__KERNEL_PERCPU), %eax
- movl %eax, %fs
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- SWITCH_TO_KERNEL_STACK
+ SAVE_ALL switch_stacks=1 skip_gs=1
ENCODE_FRAME_POINTER
- cld
UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
GS_TO_REG %ecx
movl PT_GS(%esp), %edi # get the function address
- movl PT_ORIG_EAX(%esp), %edx # get the error code
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC %edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 35a66fcfcb91..3f5a978a02a7 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -864,18 +864,84 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
*/
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
+.macro idtentry_part do_sym, has_error_code:req, read_cr2:req, paranoid:req, shift_ist=-1, ist_offset=0
+
+ .if \paranoid
+ call paranoid_entry
+ /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
+ .else
+ call error_entry
+ .endif
+ UNWIND_HINT_REGS
+
+ .if \read_cr2
+ /*
+ * Store CR2 early so subsequent faults cannot clobber it. Use R12 as
+ * intermediate storage as RDX can be clobbered in enter_from_user_mode().
+ * GET_CR2_INTO can clobber RAX.
+ */
+ GET_CR2_INTO(%r12);
+ .endif
+
+ .if \shift_ist != -1
+ TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
+ .else
+ TRACE_IRQS_OFF
+ .endif
+
+ .if \paranoid == 0
+ testb $3, CS(%rsp)
+ jz .Lfrom_kernel_no_context_tracking_\@
+ CALL_enter_from_user_mode
+.Lfrom_kernel_no_context_tracking_\@:
+ .endif
+
+ movq %rsp, %rdi /* pt_regs pointer */
+
+ .if \has_error_code
+ movq ORIG_RAX(%rsp), %rsi /* get error code */
+ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
+ .else
+ xorl %esi, %esi /* no error code */
+ .endif
+
+ .if \shift_ist != -1
+ subq $\ist_offset, CPU_TSS_IST(\shift_ist)
+ .endif
+
+ .if \read_cr2
+ movq %r12, %rdx /* Move CR2 into 3rd argument */
+ .endif
+
+ call \do_sym
+
+ .if \shift_ist != -1
+ addq $\ist_offset, CPU_TSS_IST(\shift_ist)
+ .endif
+
+ .if \paranoid
+ /* this procedure expect "no swapgs" flag in ebx */
+ jmp paranoid_exit
+ .else
+ jmp error_exit
+ .endif
+
+.endm
+
/**
* idtentry - Generate an IDT entry stub
* @sym: Name of the generated entry point
- * @do_sym: C function to be called
- * @has_error_code: True if this IDT vector has an error code on the stack
- * @paranoid: non-zero means that this vector may be invoked from
+ * @do_sym: C function to be called
+ * @has_error_code: True if this IDT vector has an error code on the stack
+ * @paranoid: non-zero means that this vector may be invoked from
* kernel mode with user GSBASE and/or user CR3.
* 2 is special -- see below.
* @shift_ist: Set to an IST index if entries from kernel mode should
- * decrement the IST stack so that nested entries get a
+ * decrement the IST stack so that nested entries get a
* fresh stack. (This is for #DB, which has a nasty habit
- * of recursing.)
+ * of recursing.)
+ * @create_gap: create a 6-word stack gap when coming from kernel mode.
+ * @read_cr2: load CR2 into the 3rd argument; done before calling any C code
*
* idtentry generates an IDT stub that sets up a usable kernel context,
* creates struct pt_regs, and calls @do_sym. The stub has the following
@@ -900,15 +966,19 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
/* Sanity check */
- .if \shift_ist != -1 && \paranoid == 0
+ .if \shift_ist != -1 && \paranoid != 1
.error "using shift_ist requires paranoid=1"
.endif
+ .if \create_gap && \paranoid
+ .error "using create_gap requires paranoid=0"
+ .endif
+
ASM_CLAC
.if \has_error_code == 0
@@ -934,47 +1004,7 @@ ENTRY(\sym)
.Lfrom_usermode_no_gap_\@:
.endif
- .if \paranoid
- call paranoid_entry
- .else
- call error_entry
- .endif
- UNWIND_HINT_REGS
- /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
-
- .if \paranoid
- .if \shift_ist != -1
- TRACE_IRQS_OFF_DEBUG /* reload IDT in case of recursion */
- .else
- TRACE_IRQS_OFF
- .endif
- .endif
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
- .endif
-
- .if \shift_ist != -1
- subq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- call \do_sym
-
- .if \shift_ist != -1
- addq $\ist_offset, CPU_TSS_IST(\shift_ist)
- .endif
-
- /* these procedures expect "no swapgs" flag in ebx */
- .if \paranoid
- jmp paranoid_exit
- .else
- jmp error_exit
- .endif
+ idtentry_part \do_sym, \has_error_code, \read_cr2, \paranoid, \shift_ist, \ist_offset
.if \paranoid == 1
/*
@@ -983,21 +1013,9 @@ ENTRY(\sym)
* run in real process context if user_mode(regs).
*/
.Lfrom_usermode_switch_stack_\@:
- call error_entry
-
- movq %rsp, %rdi /* pt_regs pointer */
-
- .if \has_error_code
- movq ORIG_RAX(%rsp), %rsi /* get error code */
- movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- .else
- xorl %esi, %esi /* no error code */
+ idtentry_part \do_sym, \has_error_code, \read_cr2, paranoid=0
.endif
- call \do_sym
-
- jmp error_exit
- .endif
_ASM_NOKPROBE(\sym)
END(\sym)
.endm
@@ -1007,7 +1025,7 @@ idtentry overflow do_overflow has_error_code=0
idtentry bounds do_bounds has_error_code=0
idtentry invalid_op do_invalid_op has_error_code=0
idtentry device_not_available do_device_not_available has_error_code=0
-idtentry double_fault do_double_fault has_error_code=1 paranoid=2
+idtentry double_fault do_double_fault has_error_code=1 paranoid=2 read_cr2=1
idtentry coprocessor_segment_overrun do_coprocessor_segment_overrun has_error_code=0
idtentry invalid_TSS do_invalid_TSS has_error_code=1
idtentry segment_not_present do_segment_not_present has_error_code=1
@@ -1179,10 +1197,10 @@ idtentry xendebug do_debug has_error_code=0
#endif
idtentry general_protection do_general_protection has_error_code=1
-idtentry page_fault do_page_fault has_error_code=1
+idtentry page_fault do_page_fault has_error_code=1 read_cr2=1
#ifdef CONFIG_KVM_GUEST
-idtentry async_page_fault do_async_page_fault has_error_code=1
+idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1
#endif
#ifdef CONFIG_X86_MCE
@@ -1281,18 +1299,9 @@ ENTRY(error_entry)
movq %rax, %rsp /* switch stack */
ENCODE_FRAME_POINTER
pushq %r12
-
- /*
- * We need to tell lockdep that IRQs are off. We can't do this until
- * we fix gsbase, and we should do it before enter_from_user_mode
- * (which can take locks).
- */
- TRACE_IRQS_OFF
- CALL_enter_from_user_mode
ret
.Lerror_entry_done:
- TRACE_IRQS_OFF
ret
/*
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index cfdca8b42c70..cc20465b2867 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -12,9 +12,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
- .globl \name
- .type \name, @function
-\name:
+ ENTRY(\name)
pushq %rbp
movq %rsp, %rbp
@@ -35,6 +33,7 @@
call \func
jmp .L_restore
+ ENDPROC(\name)
_ASM_NOKPROBE(\name)
.endm
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 34773395139a..8df549138193 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -176,9 +176,8 @@ quiet_cmd_vdso = VDSO $@
-T $(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \
- $(call ld-option, --build-id) $(call ld-option, --eh-frame-hdr) \
- -Bsymbolic
+VDSO_LDFLAGS = -shared --hash-style=both --build-id \
+ $(call ld-option, --eh-frame-hdr) -Bsymbolic
GCOV_PROFILE := n
quiet_cmd_vdso_and_check = VDSO $@
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0e033ef11a9f..0d258688c8cf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -60,8 +60,17 @@ static int hv_cpu_init(unsigned int cpu)
if (!hv_vp_assist_page)
return 0;
- if (!*hvp)
- *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ /*
+ * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section
+ * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure
+ * we always write the EOI MSR in hv_apic_eoi_write() *after* the
+ * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may
+ * not be stopped in the case of CPU offlining and the VM will hang.
+ */
+ if (!*hvp) {
+ *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO,
+ PAGE_KERNEL);
+ }
if (*hvp) {
u64 val;
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 050e5f9ebf81..e647aa095867 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -49,7 +49,7 @@ static inline void generic_apic_probe(void)
#ifdef CONFIG_X86_LOCAL_APIC
-extern unsigned int apic_verbosity;
+extern int apic_verbosity;
extern int local_apic_timer_c2_ok;
extern int disable_apic;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0cc5b611a113..7b0a4ee77313 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -607,15 +607,16 @@ struct kvm_vcpu_arch {
/*
* QEMU userspace and the guest each have their own FPU state.
- * In vcpu_run, we switch between the user, maintained in the
- * task_struct struct, and guest FPU contexts. While running a VCPU,
- * the VCPU thread will have the guest FPU context.
+ * In vcpu_run, we switch between the user and guest FPU contexts.
+ * While running a VCPU, the VCPU thread will have the guest FPU
+ * context.
*
* Note that while the PKRU state lives inside the fpu registers,
* it is switched out separately at VMENTER and VMEXIT time. The
* "guest_fpu" state here contains the guest FPU context, with the
* host PRKU bits.
*/
+ struct fpu *user_fpu;
struct fpu *guest_fpu;
u64 xcr0;
@@ -1496,25 +1497,29 @@ enum {
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+asmlinkage void __noreturn kvm_spurious_fault(void);
+
/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
- * Trap the fault and ignore the instruction if that happens.
+ * Usually after catching the fault we just panic; during reboot
+ * instead the instruction is ignored.
*/
-asmlinkage void kvm_spurious_fault(void);
-
-#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
- "666: " insn "\n\t" \
- "668: \n\t" \
- ".pushsection .fixup, \"ax\" \n" \
- "667: \n\t" \
- cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting \n\t" \
- "jne 668b \n\t" \
- __ASM_SIZE(push) " $666b \n\t" \
- "jmp kvm_spurious_fault \n\t" \
- ".popsection \n\t" \
- _ASM_EXTABLE(666b, 667b)
+#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
+ "666: \n\t" \
+ insn "\n\t" \
+ "jmp 668f \n\t" \
+ "667: \n\t" \
+ "call kvm_spurious_fault \n\t" \
+ "668: \n\t" \
+ ".pushsection .fixup, \"ax\" \n\t" \
+ "700: \n\t" \
+ cleanup_insn "\n\t" \
+ "cmpb $0, kvm_rebooting\n\t" \
+ "je 667b \n\t" \
+ "jmp 668b \n\t" \
+ ".popsection \n\t" \
+ _ASM_EXTABLE(666b, 700b)
#define __kvm_handle_fault_on_reboot(insn) \
____kvm_handle_fault_on_reboot(insn, "")
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 5ed3cf1c3934..9b4df6eaa11a 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -92,7 +92,7 @@ void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
void kvm_async_pf_task_wake(u32 token);
u32 kvm_read_and_reset_pf_reason(void);
extern void kvm_disable_steal_time(void);
-void do_async_page_fault(struct pt_regs *regs, unsigned long error_code);
+void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
#ifdef CONFIG_PARAVIRT_SPINLOCKS
void __init kvm_spinlock_init(void);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c25c38a05c1c..dce26f1d13e1 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -116,7 +116,7 @@ static inline void write_cr0(unsigned long x)
static inline unsigned long read_cr2(void)
{
- return PVOP_CALL0(unsigned long, mmu.read_cr2);
+ return PVOP_CALLEE0(unsigned long, mmu.read_cr2);
}
static inline void write_cr2(unsigned long x)
@@ -746,6 +746,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
PV_RESTORE_ALL_CALLER_REGS \
FRAME_END \
"ret;" \
+ ".size " PV_THUNK_NAME(func) ", .-" PV_THUNK_NAME(func) ";" \
".popsection")
/* Get a reference to a callee-save function */
@@ -909,13 +910,7 @@ extern void default_banner(void);
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \
)
-#endif
-
-#define GET_CR2_INTO_RAX \
- ANNOTATE_RETPOLINE_SAFE; \
- call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2);
-#ifdef CONFIG_PARAVIRT_XXL
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \
ANNOTATE_RETPOLINE_SAFE; \
@@ -929,9 +924,19 @@ extern void default_banner(void);
call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
#endif
-#endif
+#endif /* CONFIG_PARAVIRT_XXL */
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_PARAVIRT_XXL
+
+#define GET_CR2_INTO_AX \
+ PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \
+ ANNOTATE_RETPOLINE_SAFE; \
+ call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \
+ )
+
+#endif /* CONFIG_PARAVIRT_XXL */
-#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
#else /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 946f8f1f1efc..639b2df445ee 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -220,7 +220,7 @@ struct pv_mmu_ops {
void (*exit_mmap)(struct mm_struct *mm);
#ifdef CONFIG_PARAVIRT_XXL
- unsigned long (*read_cr2)(void);
+ struct paravirt_callee_save read_cr2;
void (*write_cr2)(unsigned long);
unsigned long (*read_cr3)(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index f2bd284abc16..b25e633033c3 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -74,14 +74,14 @@ dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code);
#ifdef CONFIG_X86_64
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code);
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long address);
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
asmlinkage __visible notrace
struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s);
void __init trap_init(void);
#endif
dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code);
-dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code);
+dotraplinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address);
dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code);
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code);
dotraplinkage void do_alignment_check(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index e901b0ab116f..503d3f42da16 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -435,9 +435,12 @@ struct kvm_nested_state {
/* for KVM_CAP_PMU_EVENT_FILTER */
struct kvm_pmu_event_filter {
- __u32 action;
- __u32 nevents;
- __u64 events[0];
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[0];
};
#define KVM_PMU_EVENT_ALLOW 0
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 1bd91cb7b320..f5291362da1a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
*/
-unsigned int apic_verbosity;
+int apic_verbosity;
int pic_mode;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index da64452584b0..5c7ee3df4d0b 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ static void __used common(void)
BLANK();
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
+ OFFSET(XEN_vcpu_info_arch_cr2, vcpu_info, arch.cr2);
#endif
BLANK();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index e69408bf664b..7da2bcd2b8eb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -86,9 +86,9 @@ static bool _e820__mapped_any(struct e820_table *table,
continue;
if (entry->addr >= end || entry->addr + entry->size <= start)
continue;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index bcd206c8ac90..a6342c899be5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -29,9 +29,7 @@
#ifdef CONFIG_PARAVIRT_XXL
#include <asm/asm-offsets.h>
#include <asm/paravirt.h>
-#define GET_CR2_INTO(reg) GET_CR2_INTO_RAX ; movq %rax, reg
#else
-#define GET_CR2_INTO(reg) movq %cr2, reg
#define INTERRUPT_RETURN iretq
#endif
@@ -253,10 +251,10 @@ END(secondary_startup_64)
* start_secondary() via .Ljump_to_C_code.
*/
ENTRY(start_cpu0)
- movq initial_stack(%rip), %rsp
UNWIND_HINT_EMPTY
+ movq initial_stack(%rip), %rsp
jmp .Ljump_to_C_code
-ENDPROC(start_cpu0)
+END(start_cpu0)
#endif
/* Both SMP bootup and ACPI suspend change these variables */
@@ -323,7 +321,7 @@ early_idt_handler_common:
cmpq $14,%rsi /* Page fault? */
jnz 10f
- GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
+ GET_CR2_INTO(%rdi) /* can clobber %rax if pv */
call early_make_pgtable
andl %eax,%eax
jz 20f /* All good */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 82caf01b63dd..b7f34fe2171e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -242,23 +242,23 @@ EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason);
NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason);
dotraplinkage void
-do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
enum ctx_state prev_state;
switch (kvm_read_and_reset_pf_reason()) {
default:
- do_page_fault(regs, error_code);
+ do_page_fault(regs, error_code, address);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
prev_state = exception_enter();
- kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
+ kvm_async_pf_task_wait((u32)address, !user_mode(regs));
exception_exit(prev_state);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
- kvm_async_pf_task_wake((u32)read_cr2());
+ kvm_async_pf_task_wake((u32)address);
rcu_irq_exit();
break;
}
@@ -838,6 +838,7 @@ asm(
"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
"setne %al;"
"ret;"
+".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
".popsection");
#endif
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 1bfe5c6e6cfe..afac7ccce72f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -546,17 +546,15 @@ void __init default_get_smp_config(unsigned int early)
* local APIC has default address
*/
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
- return;
+ goto out;
}
pr_info("Default MP configuration #%d\n", mpf->feature1);
construct_default_ISA_mptable(mpf->feature1);
} else if (mpf->physptr) {
- if (check_physptr(mpf, early)) {
- early_memunmap(mpf, sizeof(*mpf));
- return;
- }
+ if (check_physptr(mpf, early))
+ goto out;
} else
BUG();
@@ -565,7 +563,7 @@ void __init default_get_smp_config(unsigned int early)
/*
* Only use the first configuration found.
*/
-
+out:
early_memunmap(mpf, sizeof(*mpf));
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 98039d7fb998..0aa6256eedd8 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -370,7 +370,7 @@ struct paravirt_patch_template pv_ops = {
.mmu.exit_mmap = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
- .mmu.read_cr2 = native_read_cr2,
+ .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(native_read_cr2),
.mmu.write_cr2 = native_write_cr2,
.mmu.read_cr3 = __native_read_cr3,
.mmu.write_cr3 = native_write_cr3,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 250e4c4ac6d9..af64519b2695 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -143,17 +143,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
void release_thread(struct task_struct *dead_task)
{
- if (dead_task->mm) {
-#ifdef CONFIG_MODIFY_LDT_SYSCALL
- if (dead_task->mm->context.ldt) {
- pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
- dead_task->comm,
- dead_task->mm->context.ldt->entries,
- dead_task->mm->context.ldt->nr_entries);
- BUG();
- }
-#endif
- }
+ WARN_ON(dead_task->mm);
}
enum which_selector {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 71691a8310e7..0fdbe89d0754 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -369,12 +369,22 @@ static int putreg(struct task_struct *child,
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_MAX)
return -EIO;
- x86_fsbase_write_task(child, value);
+ /*
+ * When changing the FS base, use do_arch_prctl_64()
+ * to set the index to zero and to set the base
+ * as requested.
+ */
+ if (child->thread.fsbase != value)
+ return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
+ /*
+ * Exactly the same here as the %fs handling above.
+ */
if (value >= TASK_SIZE_MAX)
return -EIO;
- x86_gsbase_write_task(child, value);
+ if (child->thread.gsbase != value)
+ return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0;
#endif
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 87095a477154..4bb0f8447112 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -313,13 +313,10 @@ __visible void __noreturn handle_stack_overflow(const char *message,
#ifdef CONFIG_X86_64
/* Runs on IST stack */
-dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2)
{
static const char str[] = "double fault";
struct task_struct *tsk = current;
-#ifdef CONFIG_VMAP_STACK
- unsigned long cr2;
-#endif
#ifdef CONFIG_X86_ESPFIX64
extern unsigned char native_irq_return_iret[];
@@ -415,7 +412,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
* stack even if the actual trigger for the double fault was
* something else.
*/
- cr2 = read_cr2();
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
#endif
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ead681210306..22c2720cd948 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -368,9 +368,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR);
+ /* cpuid 7.1.eax */
+ const u32 kvm_cpuid_7_1_eax_x86_features =
+ F(AVX512_BF16);
+
switch (index) {
case 0:
- entry->eax = 0;
+ entry->eax = min(entry->eax, 1u);
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
/* TSC_ADJUST is emulated */
@@ -394,6 +398,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
*/
entry->edx |= F(ARCH_CAPABILITIES);
break;
+ case 1:
+ entry->eax &= kvm_cpuid_7_1_eax_x86_features;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
default:
WARN_ON_ONCE(1);
entry->eax = 0;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8e409ad448f9..718f7d9afedc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -312,29 +312,42 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
-#define FOP_FUNC(name) \
+#define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \
name ":\n\t"
-#define FOP_RET "ret \n\t"
+#define FOP_FUNC(name) \
+ __FOP_FUNC(#name)
+
+#define __FOP_RET(name) \
+ "ret \n\t" \
+ ".size " name ", .-" name "\n\t"
+
+#define FOP_RET(name) \
+ __FOP_RET(#name)
#define FOP_START(op) \
extern void em_##op(struct fastop *fake); \
asm(".pushsection .text, \"ax\" \n\t" \
".global em_" #op " \n\t" \
- FOP_FUNC("em_" #op)
+ ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ "em_" #op ":\n\t"
#define FOP_END \
".popsection")
+#define __FOPNOP(name) \
+ __FOP_FUNC(name) \
+ __FOP_RET(name)
+
#define FOPNOP() \
- FOP_FUNC(__stringify(__UNIQUE_ID(nop))) \
- FOP_RET
+ __FOPNOP(__stringify(__UNIQUE_ID(nop)))
#define FOP1E(op, dst) \
- FOP_FUNC(#op "_" #dst) \
- "10: " #op " %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst) \
+ "10: " #op " %" #dst " \n\t" \
+ __FOP_RET(#op "_" #dst)
#define FOP1EEX(op, dst) \
FOP1E(op, dst) _ASM_EXTABLE(10b, kvm_fastop_exception)
@@ -366,8 +379,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP2E(op, dst, src) \
- FOP_FUNC(#op "_" #dst "_" #src) \
- #op " %" #src ", %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst "_" #src) \
+ #op " %" #src ", %" #dst " \n\t" \
+ __FOP_RET(#op "_" #dst "_" #src)
#define FASTOP2(op) \
FOP_START(op) \
@@ -405,8 +419,9 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
FOP_END
#define FOP3E(op, dst, src, src2) \
- FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
- #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
+ __FOP_FUNC(#op "_" #dst "_" #src "_" #src2) \
+ #op " %" #src2 ", %" #src ", %" #dst " \n\t"\
+ __FOP_RET(#op "_" #dst "_" #src "_" #src2)
/* 3-operand, word-only, src2=cl */
#define FASTOP3WCL(op) \
@@ -423,7 +438,7 @@ static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *));
".type " #op ", @function \n\t" \
#op ": \n\t" \
#op " %al \n\t" \
- FOP_RET
+ __FOP_RET(#op)
asm(".pushsection .fixup, \"ax\"\n"
".global kvm_fastop_exception \n"
@@ -449,7 +464,10 @@ FOP_SETCC(setle)
FOP_SETCC(setnle)
FOP_END;
-FOP_START(salc) "pushf; sbb %al, %al; popf \n\t" FOP_RET
+FOP_START(salc)
+FOP_FUNC(salc)
+"pushf; sbb %al, %al; popf \n\t"
+FOP_RET(salc)
FOP_END;
/*
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index a39e38f13029..c10a8b10b203 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1594,7 +1594,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
uint16_t code, rep_idx, rep_cnt;
- bool fast, longmode, rep;
+ bool fast, rep;
/*
* hypercall generates UD from non zero cpl and real mode
@@ -1605,9 +1605,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
return 1;
}
- longmode = is_64_bit_mode(vcpu);
-
- if (!longmode) {
+#ifdef CONFIG_X86_64
+ if (is_64_bit_mode(vcpu)) {
+ param = kvm_rcx_read(vcpu);
+ ingpa = kvm_rdx_read(vcpu);
+ outgpa = kvm_r8_read(vcpu);
+ } else
+#endif
+ {
param = ((u64)kvm_rdx_read(vcpu) << 32) |
(kvm_rax_read(vcpu) & 0xffffffff);
ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
@@ -1615,13 +1620,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
(kvm_rsi_read(vcpu) & 0xffffffff);
}
-#ifdef CONFIG_X86_64
- else {
- param = kvm_rcx_read(vcpu);
- ingpa = kvm_rdx_read(vcpu);
- outgpa = kvm_r8_read(vcpu);
- }
-#endif
code = param & 0xffff;
fast = !!(param & HV_HYPERCALL_FAST_BIT);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 1add1bc881e2..d859ae8890d0 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -45,11 +45,6 @@
#include "lapic.h"
#include "irq.h"
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
-#else
-#define ioapic_debug(fmt, arg...)
-#endif
static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
@@ -294,7 +289,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
default:
index = (ioapic->ioregsel - 0x10) >> 1;
- ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
e = &ioapic->redirtbl[index];
@@ -343,12 +337,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
entry->fields.remote_irr))
return -1;
- ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
- "vector=%x trig_mode=%x\n",
- entry->fields.dest_id, entry->fields.dest_mode,
- entry->fields.delivery_mode, entry->fields.vector,
- entry->fields.trig_mode);
-
irqe.dest_id = entry->fields.dest_id;
irqe.vector = entry->fields.vector;
irqe.dest_mode = entry->fields.dest_mode;
@@ -515,7 +503,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff;
@@ -558,8 +545,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP;
- ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
- (void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
switch (len) {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a232e76d8f23..0aa158657f20 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -52,9 +52,6 @@
#define PRIu64 "u"
#define PRIo64 "o"
-/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
-#define apic_debug(fmt, arg...) do {} while (0)
-
/* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12)
@@ -121,6 +118,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id;
}
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
+
+static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
+{
+ return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
+}
+
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) {
@@ -627,7 +635,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{
u8 val;
if (pv_eoi_get_user(vcpu, &val) < 0)
- apic_debug("Can't read EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return val & 0x1;
}
@@ -635,7 +643,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
- apic_debug("Can't set EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return;
}
@@ -645,7 +653,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
{
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
- apic_debug("Can't clear EOI MSR value: 0x%llx\n",
+ printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return;
}
@@ -679,9 +687,6 @@ static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
else
ppr = isrv & 0xf0;
- apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
- apic, ppr, isr, isrv);
-
*new_ppr = ppr;
if (old_ppr != ppr)
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
@@ -758,8 +763,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
return ((logical_id >> 4) == (mda >> 4))
&& (logical_id & mda & 0xf) != 0;
default:
- apic_debug("Bad DFR vcpu %d: %08x\n",
- apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
return false;
}
}
@@ -798,10 +801,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
struct kvm_lapic *target = vcpu->arch.apic;
u32 mda = kvm_apic_mda(vcpu, dest, source, target);
- apic_debug("target %p, source %p, dest 0x%x, "
- "dest_mode 0x%x, short_hand 0x%x\n",
- target, source, dest, dest_mode, short_hand);
-
ASSERT(target);
switch (short_hand) {
case APIC_DEST_NOSHORT:
@@ -816,8 +815,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
case APIC_DEST_ALLBUT:
return target != source;
default:
- apic_debug("kvm: apic: Bad dest shorthand value %x\n",
- short_hand);
return false;
}
}
@@ -1095,15 +1092,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
- } else {
- apic_debug("Ignoring de-assert INIT to vcpu %d\n",
- vcpu->vcpu_id);
}
break;
case APIC_DM_STARTUP:
- apic_debug("SIPI to vcpu %d vector 0x%02x\n",
- vcpu->vcpu_id, vector);
result = 1;
apic->sipi_vector = vector;
/* make sure sipi_vector is visible for the receiver */
@@ -1221,14 +1213,6 @@ static void apic_send_ipi(struct kvm_lapic *apic)
trace_kvm_apic_ipi(icr_low, irq.dest_id);
- apic_debug("icr_high 0x%x, icr_low 0x%x, "
- "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
- "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
- "msi_redir_hint 0x%x\n",
- icr_high, icr_low, irq.shorthand, irq.dest_id,
- irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
- irq.vector, irq.msi_redir_hint);
-
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
}
@@ -1282,7 +1266,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
switch (offset) {
case APIC_ARBPRI:
- apic_debug("Access APIC ARBPRI register which is for P6\n");
break;
case APIC_TMCCT: /* Timer CCR */
@@ -1349,11 +1332,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
- if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) {
- apic_debug("KVM_APIC_READ: read reserved register %x\n",
- offset);
+ if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
- }
result = __apic_read(apic, offset & ~0xf);
@@ -1411,9 +1391,6 @@ static void update_divide_count(struct kvm_lapic *apic)
tmp1 = tdcr & 0xf;
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
apic->divide_count = 0x1 << (tmp2 & 0x7);
-
- apic_debug("timer divide count is 0x%x\n",
- apic->divide_count);
}
static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
@@ -1455,29 +1432,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
}
}
-static void apic_timer_expired(struct kvm_lapic *apic)
-{
- struct kvm_vcpu *vcpu = apic->vcpu;
- struct swait_queue_head *q = &vcpu->wq;
- struct kvm_timer *ktimer = &apic->lapic_timer;
-
- if (atomic_read(&apic->lapic_timer.pending))
- return;
-
- atomic_inc(&apic->lapic_timer.pending);
- kvm_set_pending_timer(vcpu);
-
- /*
- * For x86, the atomic_inc() is serialized, thus
- * using swait_active() is safe.
- */
- if (swait_active(q))
- swake_up_one(q);
-
- if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
- ktimer->expired_tscdeadline = ktimer->tscdeadline;
-}
-
/*
* On APICv, this test will cause a busy wait
* during a higher-priority task.
@@ -1551,7 +1505,7 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
-void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
@@ -1559,9 +1513,6 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
- if (!lapic_timer_int_injected(vcpu))
- return;
-
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@@ -1573,8 +1524,57 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
+
+void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
+{
+ if (lapic_timer_int_injected(vcpu))
+ __kvm_wait_lapic_expire(vcpu);
+}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
+static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
+{
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ kvm_apic_local_deliver(apic, APIC_LVTT);
+ if (apic_lvtt_tscdeadline(apic))
+ ktimer->tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ ktimer->tscdeadline = 0;
+ ktimer->target_expiration = 0;
+ }
+}
+
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ struct swait_queue_head *q = &vcpu->wq;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+
+ if (atomic_read(&apic->lapic_timer.pending))
+ return;
+
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
+ ktimer->expired_tscdeadline = ktimer->tscdeadline;
+
+ if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
+ if (apic->lapic_timer.timer_advance_ns)
+ __kvm_wait_lapic_expire(vcpu);
+ kvm_apic_inject_pending_timer_irqs(apic);
+ return;
+ }
+
+ atomic_inc(&apic->lapic_timer.pending);
+ kvm_set_pending_timer(vcpu);
+
+ /*
+ * For x86, the atomic_inc() is serialized, thus
+ * using swait_active() is safe.
+ */
+ if (swait_active(q))
+ swake_up_one(q);
+}
+
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
@@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
- hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
} else
apic_timer_expired(apic);
@@ -1648,16 +1648,6 @@ static bool set_target_expiration(struct kvm_lapic *apic)
limit_periodic_timer_frequency(apic);
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
-
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
@@ -1703,7 +1693,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS);
}
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@@ -1860,8 +1850,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
if (lvt0_in_nmi_mode) {
- apic_debug("Receive NMI setting on APIC_LVT0 "
- "for cpu %d\n", apic->vcpu->vcpu_id);
atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
} else
atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
@@ -1975,8 +1963,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count;
- if (val & 4)
- apic_debug("KVM_WRITE:TDCR %x\n", val);
kvm_lapic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic);
if (apic->divide_count != old_divisor &&
@@ -1988,10 +1974,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break;
}
case APIC_ESR:
- if (apic_x2apic_mode(apic) && val != 0) {
- apic_debug("KVM_WRITE:ESR not zero %x\n", val);
+ if (apic_x2apic_mode(apic) && val != 0)
ret = 1;
- }
break;
case APIC_SELF_IPI:
@@ -2004,8 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
ret = 1;
break;
}
- if (ret)
- apic_debug("Local APIC Write to read-only register %x\n", reg);
+
return ret;
}
EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
@@ -2033,20 +2016,12 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
* 32/64/128 bits registers must be accessed thru 32 bits.
* Refer SDM 8.4.1
*/
- if (len != 4 || (offset & 0xf)) {
- /* Don't shout loud, $infamous_os would cause only noise. */
- apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
+ if (len != 4 || (offset & 0xf))
return 0;
- }
val = *(u32*)data;
- /* too common printing */
- if (offset != APIC_EOI)
- apic_debug("%s: offset 0x%x with length 0x%x, and value is "
- "0x%x\n", __func__, offset, len, val);
-
- kvm_lapic_reg_write(apic, offset, val);
+ kvm_lapic_reg_write(apic, offset & 0xff0, val);
return 0;
}
@@ -2178,11 +2153,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((value & MSR_IA32_APICBASE_ENABLE) &&
apic->base_address != APIC_DEFAULT_PHYS_BASE)
pr_warn_once("APIC base relocation is unsupported by KVM");
-
- /* with FSB delivery interrupt, we can restart APIC functionality */
- apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
- "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
-
}
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -2193,8 +2163,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!apic)
return;
- apic_debug("%s\n", __func__);
-
/* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2247,11 +2215,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0;
-
- apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
- "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
- vcpu, kvm_lapic_get_reg(apic, APIC_ID),
- vcpu->arch.apic_base, apic->base_address);
}
/*
@@ -2323,7 +2286,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
struct kvm_lapic *apic;
ASSERT(vcpu != NULL);
- apic_debug("apic_init %d\n", vcpu->vcpu_id);
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic)
@@ -2340,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_ABS_PINNED);
+ HRTIMER_MODE_ABS);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@@ -2397,13 +2359,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
if (atomic_read(&apic->lapic_timer.pending) > 0) {
- kvm_apic_local_deliver(apic, APIC_LVTT);
- if (apic_lvtt_tscdeadline(apic))
- apic->lapic_timer.tscdeadline = 0;
- if (apic_lvtt_oneshot(apic)) {
- apic->lapic_timer.tscdeadline = 0;
- apic->lapic_timer.target_expiration = 0;
- }
+ kvm_apic_inject_pending_timer_irqs(apic);
atomic_set(&apic->lapic_timer.pending, 0);
}
}
@@ -2525,12 +2481,13 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{
struct hrtimer *timer;
- if (!lapic_in_kernel(vcpu))
+ if (!lapic_in_kernel(vcpu) ||
+ kvm_can_post_timer_interrupt(vcpu))
return;
timer = &vcpu->arch.apic->lapic_timer.timer;
if (hrtimer_cancel(timer))
- hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
/*
@@ -2678,11 +2635,8 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1;
- if (reg == APIC_DFR || reg == APIC_ICR2) {
- apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
- reg);
+ if (reg == APIC_DFR || reg == APIC_ICR2)
return 1;
- }
if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1;
@@ -2780,8 +2734,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
- apic_debug("vcpu %d received sipi with vector # %x\n",
- vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 36747174e4a8..50053d2b8b7b 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -236,6 +236,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9a5814d8d194..24843cf49579 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3466,7 +3466,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
/*
* Currently, fast page fault only works for direct mapping
* since the gfn is not stable for indirect shadow page. See
- * Documentation/virtual/kvm/locking.txt to get more detail.
+ * Documentation/virt/kvm/locking.txt to get more detail.
*/
fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
iterator.sptep, spte,
@@ -4597,11 +4597,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
*/
/* Faults from writes to non-writable pages */
- u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
+ u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
/* Faults from user mode accesses to supervisor pages */
- u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
+ u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
/* Faults from fetches of non-executable pages*/
- u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
+ u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
/* Faults from kernel mode fetches of user pages */
u8 smepf = 0;
/* Faults from kernel mode accesses of user pages */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index aa5a2597305a..46875bbd0419 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -19,8 +19,8 @@
#include "lapic.h"
#include "pmu.h"
-/* This keeps the total size of the filter under 4k. */
-#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63
+/* This is enough to filter the vast majority of currently defined events. */
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
/* NOTE:
* - Each perf counter is defined as "struct kvm_pmc";
@@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
if (IS_ERR(event)) {
- printk_once("kvm_pmu: event creation failed %ld\n",
- PTR_ERR(event));
+ pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
+ PTR_ERR(event), pmc->idx);
return;
}
@@ -206,12 +206,24 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
{
unsigned en_field = ctrl & 0x3;
bool pmi = ctrl & 0x8;
+ struct kvm_pmu_event_filter *filter;
+ struct kvm *kvm = pmc->vcpu->kvm;
pmc_stop_counter(pmc);
if (!en_field || !pmc_is_enabled(pmc))
return;
+ filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
+ if (filter) {
+ if (filter->action == KVM_PMU_EVENT_DENY &&
+ test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+ return;
+ if (filter->action == KVM_PMU_EVENT_ALLOW &&
+ !test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
+ return;
+ }
+
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_x86_ops->pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */
@@ -385,6 +397,9 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
tmp.action != KVM_PMU_EVENT_DENY)
return -EINVAL;
+ if (tmp.flags != 0)
+ return -EINVAL;
+
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
return -E2BIG;
@@ -406,8 +421,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu);
- r = 0;
+ r = 0;
cleanup:
kfree(filter);
- return r;
+ return r;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 583b9fa656f3..7eafc6907861 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2143,12 +2143,20 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
goto out;
}
+ svm->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!svm->vcpu.arch.user_fpu) {
+ printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+ err = -ENOMEM;
+ goto free_partial_svm;
+ }
+
svm->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!svm->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
err = -ENOMEM;
- goto free_partial_svm;
+ goto free_user_fpu;
}
err = kvm_vcpu_init(&svm->vcpu, kvm, id);
@@ -2211,6 +2219,8 @@ uninit:
kvm_vcpu_uninit(&svm->vcpu);
free_svm:
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
free_partial_svm:
kmem_cache_free(kvm_vcpu_cache, svm);
out:
@@ -2241,6 +2251,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
__free_page(virt_to_page(svm->nested.hsave));
__free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, svm);
}
@@ -7128,13 +7139,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
- bool is_user, smap;
-
- is_user = svm_get_cpl(vcpu) == 3;
- smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
+ unsigned long cr4 = kvm_read_cr4(vcpu);
+ bool smep = cr4 & X86_CR4_SMEP;
+ bool smap = cr4 & X86_CR4_SMAP;
+ bool is_user = svm_get_cpl(vcpu) == 3;
/*
- * Detect and workaround Errata 1096 Fam_17h_00_0Fh
+ * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
+ *
+ * Errata:
+ * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
+ * possible that CPU microcode implementing DecodeAssist will fail
+ * to read bytes of instruction which caused #NPF. In this case,
+ * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
+ * return 0 instead of the correct guest instruction bytes.
+ *
+ * This happens because CPU microcode reading instruction bytes
+ * uses a special opcode which attempts to read data using CPL=0
+ * priviledges. The microcode reads CS:RIP and if it hits a SMAP
+ * fault, it gives up and returns no instruction bytes.
+ *
+ * Detection:
+ * We reach here in case CPU supports DecodeAssist, raised #NPF and
+ * returned 0 in GuestIntrBytes field of the VMCB.
+ * First, errata can only be triggered in case vCPU CR4.SMAP=1.
+ * Second, if vCPU CR4.SMEP=1, errata could only be triggered
+ * in case vCPU CPL==3 (Because otherwise guest would have triggered
+ * a SMEP fault instead of #NPF).
+ * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
+ * As most guests enable SMAP if they have also enabled SMEP, use above
+ * logic in order to attempt minimize false-positive of detecting errata
+ * while still preserving all cases semantic correctness.
+ *
+ * Workaround:
+ * To determine what instruction the guest was executing, the hypervisor
+ * will have to decode the instruction at the instruction pointer.
*
* In non SEV guest, hypervisor will be able to read the guest
* memory to decode the instruction pointer when insn_len is zero
@@ -7145,11 +7184,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
* instruction pointer so we will not able to workaround it. Lets
* print the error and request to kill the guest.
*/
- if (is_user && smap) {
+ if (smap && (!smep || is_user)) {
if (!sev_guest(vcpu->kvm))
return true;
- pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n");
+ pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
}
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index bb509c254939..ced9fba32598 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -194,6 +194,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, -1ull);
+ vmx->nested.need_vmcs12_to_shadow_sync = false;
}
static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
@@ -219,6 +220,8 @@ static void free_nested(struct kvm_vcpu *vcpu)
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
return;
+ kvm_clear_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
+
vmx->nested.vmxon = false;
vmx->nested.smm.vmxon = false;
free_vpid(vmx->nested.vpid02);
@@ -231,7 +234,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
vmx->vmcs01.shadow_vmcs = NULL;
}
kfree(vmx->nested.cached_vmcs12);
+ vmx->nested.cached_vmcs12 = NULL;
kfree(vmx->nested.cached_shadow_vmcs12);
+ vmx->nested.cached_shadow_vmcs12 = NULL;
/* Unpin physical memory we referred to in the vmcs02 */
if (vmx->nested.apic_access_page) {
kvm_release_page_dirty(vmx->nested.apic_access_page);
@@ -1341,6 +1346,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
unsigned long val;
int i;
+ if (WARN_ON(!shadow_vmcs))
+ return;
+
preempt_disable();
vmcs_load(shadow_vmcs);
@@ -1373,6 +1381,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
unsigned long val;
int i, q;
+ if (WARN_ON(!shadow_vmcs))
+ return;
+
vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) {
@@ -4194,7 +4205,10 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
* mode, e.g. a 32-bit address size can yield a 64-bit virtual
* address when using FS/GS with a non-zero base.
*/
- *ret = s.base + off;
+ if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
+ *ret = s.base + off;
+ else
+ *ret = off;
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
* non-canonical form. This is the only check on the memory
@@ -4433,7 +4447,6 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
/* copy to memory all shadowed fields in case
they were modified */
copy_shadow_to_vmcs12(vmx);
- vmx->nested.need_vmcs12_to_shadow_sync = false;
vmx_disable_shadow_vmcs(vmx);
}
vmx->nested.posted_intr_nv = -1;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 68d231d49c7a..4dea0e0e7e39 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -337,17 +337,22 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct kvm_pmc *pmc = NULL;
int i;
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
- struct kvm_pmc *pmc = &pmu->gp_counters[i];
+ pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc);
pmc->counter = pmc->eventsel = 0;
}
- for (i = 0; i < INTEL_PMC_MAX_FIXED; i++)
- pmc_stop_counter(&pmu->fixed_counters[i]);
+ for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
+ pmc = &pmu->fixed_counters[i];
+
+ pmc_stop_counter(pmc);
+ pmc->counter = 0;
+ }
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
pmu->global_ovf_ctrl = 0;
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index d4cb1945b2e3..4010d519eb8c 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -54,9 +54,9 @@ ENTRY(vmx_vmenter)
ret
3: cmpb $0, kvm_rebooting
- jne 4f
- call kvm_spurious_fault
-4: ret
+ je 4f
+ ret
+4: ud2
.pushsection .fixup, "ax"
5: jmp 3b
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 69536553446d..074385c86c09 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5829,6 +5829,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
}
if (unlikely(vmx->fail)) {
+ dump_vmcs();
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR);
@@ -6597,6 +6598,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
free_loaded_vmcs(vmx->loaded_vmcs);
kfree(vmx->guest_msrs);
kvm_vcpu_uninit(vcpu);
+ kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
kmem_cache_free(kvm_vcpu_cache, vmx);
}
@@ -6612,12 +6614,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
if (!vmx)
return ERR_PTR(-ENOMEM);
+ vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache,
+ GFP_KERNEL_ACCOUNT);
+ if (!vmx->vcpu.arch.user_fpu) {
+ printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n");
+ err = -ENOMEM;
+ goto free_partial_vcpu;
+ }
+
vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache,
GFP_KERNEL_ACCOUNT);
if (!vmx->vcpu.arch.guest_fpu) {
printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n");
err = -ENOMEM;
- goto free_partial_vcpu;
+ goto free_user_fpu;
}
vmx->vpid = allocate_vpid();
@@ -6720,6 +6730,8 @@ uninit_vcpu:
free_vcpu:
free_vpid(vmx->vpid);
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
+free_user_fpu:
+ kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
free_partial_vcpu:
kmem_cache_free(kvm_vcpu_cache, vmx);
return ERR_PTR(err);
@@ -7064,7 +7076,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
- if (kvm_mwait_in_guest(vcpu->kvm))
+ if (kvm_mwait_in_guest(vcpu->kvm) ||
+ kvm_can_post_timer_interrupt(vcpu))
return -EOPNOTSUPP;
vmx = to_vmx(vcpu);
@@ -7453,7 +7466,7 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{
- return 0;
+ return false;
}
static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4a0b74ecd1de..c6d951cbd76c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -51,6 +51,7 @@
#include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h>
#include <linux/sched/stat.h>
+#include <linux/sched/isolation.h>
#include <linux/mem_encrypt.h>
#include <trace/events/kvm.h>
@@ -153,6 +154,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
static bool __read_mostly force_emulation_prefix = false;
module_param(force_emulation_prefix, bool, S_IRUGO);
+int __read_mostly pi_inject_timer = -1;
+module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
+
#define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global {
@@ -1456,12 +1460,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{
- /*
- * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
- * vcpu_enter_guest. This function is only called from
- * the physical CPU that is running vcpu.
- */
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+ kvm_vcpu_kick(vcpu);
}
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
@@ -1540,9 +1540,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
*pshift = shift;
*pmultiplier = div_frac(scaled64, tps32);
-
- pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
- __func__, base_hz, scaled_hz, shift, *pmultiplier);
}
#ifdef CONFIG_X86_64
@@ -1785,12 +1782,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
- pr_debug("kvm: matched tsc offset for %llu\n", data);
} else {
u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta;
offset = kvm_compute_tsc_offset(vcpu, data);
- pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
matched = true;
already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@@ -1809,8 +1804,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset;
matched = false;
- pr_debug("kvm: new tsc generation %llu, clock %llu\n",
- kvm->arch.cur_tsc_generation, data);
}
/*
@@ -3313,6 +3306,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops->vcpu_load(vcpu, cpu);
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
/* Apply any externally detected TSC adjustments (due to suspend) */
if (unlikely(vcpu->arch.tsc_offset_adjustment)) {
adjust_tsc_offset_host(vcpu, vcpu->arch.tsc_offset_adjustment);
@@ -6911,7 +6908,6 @@ static void kvm_timer_init(void)
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
}
- pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
kvmclock_cpu_online, kvmclock_cpu_down_prep);
@@ -7070,6 +7066,8 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
+ if (pi_inject_timer == -1)
+ pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
#ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
@@ -7208,7 +7206,7 @@ static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
rcu_read_unlock();
- if (target)
+ if (target && READ_ONCE(target->ready))
kvm_vcpu_yield_to(target);
}
@@ -7248,6 +7246,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
break;
case KVM_HC_KICK_CPU:
kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
+ kvm_sched_yield(vcpu->kvm, a1);
ret = 0;
break;
#ifdef CONFIG_X86_64
@@ -7996,9 +7995,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
trace_kvm_entry(vcpu->vcpu_id);
guest_enter_irqoff();
- fpregs_assert_state_consistent();
- if (test_thread_flag(TIF_NEED_FPU_LOAD))
- switch_fpu_return();
+ /* The preempt notifier should have taken care of the FPU already. */
+ WARN_ON_ONCE(test_thread_flag(TIF_NEED_FPU_LOAD));
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
@@ -8276,7 +8274,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
fpregs_lock();
- copy_fpregs_to_fpstate(&current->thread.fpu);
+ copy_fpregs_to_fpstate(vcpu->arch.user_fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
@@ -8293,7 +8291,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
fpregs_lock();
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
- copy_kernel_to_fpregs(&current->thread.fpu.state);
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
fpregs_mark_activate();
fpregs_unlock();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e08a12892e8b..6594020c0691 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -301,6 +301,8 @@ extern unsigned int min_timer_period_us;
extern bool enable_vmware_backdoor;
+extern int pi_inject_timer;
+
extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 378a1f70ae7d..4fe1601dbc5d 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -239,7 +239,7 @@ copy_user_handle_tail:
ret
_ASM_EXTABLE_UA(1b, 2b)
-ENDPROC(copy_user_handle_tail)
+END(copy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 74fdff968ea3..304f958c27b2 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -115,29 +115,29 @@ ENDPROC(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
+bad_get_user_clac:
+ ASM_CLAC
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
- ASM_CLAC
ret
-END(bad_get_user)
#ifdef CONFIG_X86_32
+bad_get_user_8_clac:
+ ASM_CLAC
bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
- ASM_CLAC
ret
-END(bad_get_user_8)
#endif
- _ASM_EXTABLE_UA(1b, bad_get_user)
- _ASM_EXTABLE_UA(2b, bad_get_user)
- _ASM_EXTABLE_UA(3b, bad_get_user)
+ _ASM_EXTABLE_UA(1b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(2b, bad_get_user_clac)
+ _ASM_EXTABLE_UA(3b, bad_get_user_clac)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE_UA(4b, bad_get_user)
+ _ASM_EXTABLE_UA(4b, bad_get_user_clac)
#else
- _ASM_EXTABLE_UA(4b, bad_get_user_8)
- _ASM_EXTABLE_UA(5b, bad_get_user_8)
+ _ASM_EXTABLE_UA(4b, bad_get_user_8_clac)
+ _ASM_EXTABLE_UA(5b, bad_get_user_8_clac)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index d2e5c9c39601..14bf78341d3c 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -32,8 +32,6 @@
*/
#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
-#define EXIT ASM_CLAC ; \
- ret
.text
ENTRY(__put_user_1)
@@ -43,7 +41,8 @@ ENTRY(__put_user_1)
ASM_STAC
1: movb %al,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
@@ -56,7 +55,8 @@ ENTRY(__put_user_2)
ASM_STAC
2: movw %ax,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
@@ -69,7 +69,8 @@ ENTRY(__put_user_4)
ASM_STAC
3: movl %eax,(%_ASM_CX)
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ ret
ENDPROC(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
@@ -85,19 +86,21 @@ ENTRY(__put_user_8)
5: movl %edx,4(%_ASM_CX)
#endif
xor %eax,%eax
- EXIT
+ ASM_CLAC
+ RET
ENDPROC(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
+bad_put_user_clac:
+ ASM_CLAC
bad_put_user:
movl $-EFAULT,%eax
- EXIT
-END(bad_put_user)
+ RET
- _ASM_EXTABLE_UA(1b, bad_put_user)
- _ASM_EXTABLE_UA(2b, bad_put_user)
- _ASM_EXTABLE_UA(3b, bad_put_user)
- _ASM_EXTABLE_UA(4b, bad_put_user)
+ _ASM_EXTABLE_UA(1b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(2b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(3b, bad_put_user_clac)
+ _ASM_EXTABLE_UA(4b, bad_put_user_clac)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE_UA(5b, bad_put_user)
+ _ASM_EXTABLE_UA(5b, bad_put_user_clac)
#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index e0e006f1624e..fff28c6f73a2 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL(clear_user);
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
*/
-__visible unsigned long
+__visible notrace unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len)
{
for (; len; --len, to++, from++) {
diff --git a/arch/x86/math-emu/fpu_emu.h b/arch/x86/math-emu/fpu_emu.h
index a5a41ec58072..0c122226ca56 100644
--- a/arch/x86/math-emu/fpu_emu.h
+++ b/arch/x86/math-emu/fpu_emu.h
@@ -177,7 +177,7 @@ static inline void reg_copy(FPU_REG const *x, FPU_REG *y)
#define setexponentpos(x,y) { (*(short *)&((x)->exp)) = \
((y) + EXTENDED_Ebias) & 0x7fff; }
#define exponent16(x) (*(short *)&((x)->exp))
-#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (y); }
+#define setexponent16(x,y) { (*(short *)&((x)->exp)) = (u16)(y); }
#define addexponent(x,y) { (*(short *)&((x)->exp)) += (y); }
#define stdexp(x) { (*(short *)&((x)->exp)) += EXTENDED_Ebias; }
diff --git a/arch/x86/math-emu/reg_constant.c b/arch/x86/math-emu/reg_constant.c
index 8dc9095bab22..742619e94bdf 100644
--- a/arch/x86/math-emu/reg_constant.c
+++ b/arch/x86/math-emu/reg_constant.c
@@ -18,7 +18,7 @@
#include "control_w.h"
#define MAKE_REG(s, e, l, h) { l, h, \
- ((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
+ (u16)((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) }
FPU_REG const CONST_1 = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
#if 0
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index d1634c59ed56..6c46095cd0d9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1490,9 +1490,8 @@ good_area:
NOKPROBE_SYMBOL(do_user_addr_fault);
/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
+ * Explicitly marked noinline such that the function tracer sees this as the
+ * page_fault entry point.
*/
static noinline void
__do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
@@ -1511,33 +1510,26 @@ __do_page_fault(struct pt_regs *regs, unsigned long hw_error_code,
}
NOKPROBE_SYMBOL(__do_page_fault);
-static nokprobe_inline void
-trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
- unsigned long error_code)
+static __always_inline void
+trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address)
{
+ if (!trace_pagefault_enabled())
+ return;
+
if (user_mode(regs))
trace_page_fault_user(address, regs, error_code);
else
trace_page_fault_kernel(address, regs, error_code);
}
-/*
- * We must have this function blacklisted from kprobes, tagged with notrace
- * and call read_cr2() before calling anything else. To avoid calling any
- * kind of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contains all sorts of tracepoints.
- */
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void
+do_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;
prev_state = exception_enter();
- if (trace_pagefault_enabled())
- trace_page_fault_entries(address, regs, error_code);
-
+ trace_page_fault_entries(regs, error_code, address);
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index e0df96fdfe46..fece30ca8b0c 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -15,6 +15,10 @@
#include <linux/dma-direct.h>
#include <linux/swiotlb.h>
#include <linux/mem_encrypt.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
@@ -41,7 +45,7 @@ EXPORT_SYMBOL_GPL(sev_enable_key);
bool sev_enabled __section(.data);
/* Buffer used for early in-place encryption by BSP, no locking needed */
-static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE);
+static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE);
/*
* This routine does not change the underlying encryption setting of the
@@ -348,6 +352,32 @@ bool sev_active(void)
}
EXPORT_SYMBOL(sev_active);
+/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
+bool force_dma_unencrypted(struct device *dev)
+{
+ /*
+ * For SEV, all DMA must be to unencrypted addresses.
+ */
+ if (sev_active())
+ return true;
+
+ /*
+ * For SME, all DMA must be to unencrypted addresses if the
+ * device does not support DMA to addresses that include the
+ * encryption mask.
+ */
+ if (sme_active()) {
+ u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask));
+ u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask,
+ dev->bus_dma_mask);
+
+ if (dma_dev_mask <= dma_enc_mask)
+ return true;
+ }
+
+ return false;
+}
+
/* Architecture __weak replacement functions */
void __init mem_encrypt_free_decrypted_mem(void)
{
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index bed6bb93c965..7ceb32821093 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -998,7 +998,8 @@ void __init xen_setup_vcpu_info_placement(void)
__PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
pv_ops.irq.irq_enable =
__PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
- pv_ops.mmu.read_cr2 = xen_read_cr2_direct;
+ pv_ops.mmu.read_cr2 =
+ __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
}
}
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index f6e5eeecfc69..26e8b326966d 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1307,16 +1307,6 @@ static void xen_write_cr2(unsigned long cr2)
this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
}
-static unsigned long xen_read_cr2(void)
-{
- return this_cpu_read(xen_vcpu)->arch.cr2;
-}
-
-unsigned long xen_read_cr2_direct(void)
-{
- return this_cpu_read(xen_vcpu_info.arch.cr2);
-}
-
static noinline void xen_flush_tlb(void)
{
struct mmuext_op *op;
@@ -2397,7 +2387,7 @@ static void xen_leave_lazy_mmu(void)
}
static const struct pv_mmu_ops xen_mmu_ops __initconst = {
- .read_cr2 = xen_read_cr2,
+ .read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
.write_cr2 = xen_write_cr2,
.read_cr3 = xen_read_cr3,
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..be104eef80be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -10,6 +10,7 @@
#include <asm/percpu.h>
#include <asm/processor-flags.h>
#include <asm/frame.h>
+#include <asm/asm.h>
#include <linux/linkage.h>
@@ -135,3 +136,18 @@ ENTRY(check_events)
FRAME_END
ret
ENDPROC(check_events)
+
+ENTRY(xen_read_cr2)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
+ _ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2);
+
+ENTRY(xen_read_cr2_direct)
+ FRAME_BEGIN
+ _ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
+ FRAME_END
+ ret
+ ENDPROC(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f111f47ba98..45a441c33d6d 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -134,6 +134,9 @@ __visible void xen_irq_disable_direct(void);
__visible unsigned long xen_save_fl_direct(void);
__visible void xen_restore_fl_direct(unsigned long);
+__visible unsigned long xen_read_cr2(void);
+__visible unsigned long xen_read_cr2_direct(void);
+
/* These are not functions, and cannot be called normally */
__visible void xen_iret(void);
__visible void xen_sysret32(void);
OpenPOWER on IntegriCloud