diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 5 | ||||
-rw-r--r-- | arch/x86/xen/enlighten_pv.c | 176 | ||||
-rw-r--r-- | arch/x86/xen/irq.c | 3 | ||||
-rw-r--r-- | arch/x86/xen/mmu_pv.c | 5 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm.S | 26 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm.h | 12 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_32.S | 27 | ||||
-rw-r--r-- | arch/x86/xen/xen-asm_64.S | 102 | ||||
-rw-r--r-- | arch/x86/xen/xen-head.S | 2 | ||||
-rw-r--r-- | arch/x86/xen/xen-ops.h | 16 |
10 files changed, 170 insertions, 204 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 027987638e98..1ecd419811a2 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -17,6 +17,9 @@ config XEN_PV bool "Xen PV guest support" default y depends on XEN + # XEN_PV is not ready to work with 5-level paging. + # Changes to hypervisor are also required. + depends on !X86_5LEVEL select XEN_HAVE_PVMMU select XEN_HAVE_VPMU help @@ -75,4 +78,6 @@ config XEN_DEBUG_FS config XEN_PVH bool "Support for running as a PVH guest" depends on XEN && XEN_PVHVM && ACPI + # Pre-built page tables are not ready to handle 5-level paging. + depends on !X86_5LEVEL def_bool n diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 811e4ddb3f37..ae2a2e2d6362 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -263,6 +263,13 @@ static void __init xen_init_capabilities(void) setup_clear_cpu_cap(X86_FEATURE_MTRR); setup_clear_cpu_cap(X86_FEATURE_ACC); setup_clear_cpu_cap(X86_FEATURE_X2APIC); + setup_clear_cpu_cap(X86_FEATURE_SME); + + /* + * Xen PV would need some work to support PCID: CR3 handling as well + * as xen_flush_tlb_others() would need updating. + */ + setup_clear_cpu_cap(X86_FEATURE_PCID); if (!xen_initial_domain()) setup_clear_cpu_cap(X86_FEATURE_ACPI); @@ -494,7 +501,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) static inline bool desc_equal(const struct desc_struct *d1, const struct desc_struct *d2) { - return d1->a == d2->a && d1->b == d2->b; + return !memcmp(d1, d2, sizeof(*d1)); } static void load_TLS_descriptor(struct thread_struct *t, @@ -579,59 +586,91 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, preempt_enable(); } +#ifdef CONFIG_X86_64 +struct trap_array_entry { + void (*orig)(void); + void (*xen)(void); + bool ist_okay; +}; + +static struct trap_array_entry trap_array[] = { + { debug, xen_xendebug, true }, + { int3, xen_xenint3, true }, + { double_fault, xen_double_fault, true }, +#ifdef CONFIG_X86_MCE + { machine_check, xen_machine_check, true }, +#endif + { nmi, xen_nmi, true }, + { overflow, xen_overflow, false }, +#ifdef CONFIG_IA32_EMULATION + { entry_INT80_compat, xen_entry_INT80_compat, false }, +#endif + { page_fault, xen_page_fault, false }, + { divide_error, xen_divide_error, false }, + { bounds, xen_bounds, false }, + { invalid_op, xen_invalid_op, false }, + { device_not_available, xen_device_not_available, false }, + { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false }, + { invalid_TSS, xen_invalid_TSS, false }, + { segment_not_present, xen_segment_not_present, false }, + { stack_segment, xen_stack_segment, false }, + { general_protection, xen_general_protection, false }, + { spurious_interrupt_bug, xen_spurious_interrupt_bug, false }, + { coprocessor_error, xen_coprocessor_error, false }, + { alignment_check, xen_alignment_check, false }, + { simd_coprocessor_error, xen_simd_coprocessor_error, false }, +}; + +static bool get_trap_addr(void **addr, unsigned int ist) +{ + unsigned int nr; + bool ist_okay = false; + + /* + * Replace trap handler addresses by Xen specific ones. + * Check for known traps using IST and whitelist them. + * The debugger ones are the only ones we care about. + * Xen will handle faults like double_fault, * so we should never see + * them. Warn if there's an unexpected IST-using fault handler. + */ + for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) { + struct trap_array_entry *entry = trap_array + nr; + + if (*addr == entry->orig) { + *addr = entry->xen; + ist_okay = entry->ist_okay; + break; + } + } + + if (WARN_ON(ist != 0 && !ist_okay)) + return false; + + return true; +} +#endif + static int cvt_gate_to_trap(int vector, const gate_desc *val, struct trap_info *info) { unsigned long addr; - if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) + if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT) return 0; info->vector = vector; - addr = gate_offset(*val); + addr = gate_offset(val); #ifdef CONFIG_X86_64 - /* - * Look for known traps using IST, and substitute them - * appropriately. The debugger ones are the only ones we care - * about. Xen will handle faults like double_fault, - * so we should never see them. Warn if - * there's an unexpected IST-using fault handler. - */ - if (addr == (unsigned long)debug) - addr = (unsigned long)xen_debug; - else if (addr == (unsigned long)int3) - addr = (unsigned long)xen_int3; - else if (addr == (unsigned long)stack_segment) - addr = (unsigned long)xen_stack_segment; - else if (addr == (unsigned long)double_fault) { - /* Don't need to handle these */ + if (!get_trap_addr((void **)&addr, val->bits.ist)) return 0; -#ifdef CONFIG_X86_MCE - } else if (addr == (unsigned long)machine_check) { - /* - * when xen hypervisor inject vMCE to guest, - * use native mce handler to handle it - */ - ; -#endif - } else if (addr == (unsigned long)nmi) - /* - * Use the native version as well. - */ - ; - else { - /* Some other trap using IST? */ - if (WARN_ON(val->ist != 0)) - return 0; - } #endif /* CONFIG_X86_64 */ info->address = addr; - info->cs = gate_segment(*val); - info->flags = val->dpl; + info->cs = gate_segment(val); + info->flags = val->bits.dpl; /* interrupt gates clear IF */ - if (val->type == GATE_INTERRUPT) + if (val->bits.type == GATE_INTERRUPT) info->flags |= 1 << 2; return 1; @@ -981,59 +1020,6 @@ void __ref xen_setup_vcpu_info_placement(void) } } -static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, - unsigned long addr, unsigned len) -{ - char *start, *end, *reloc; - unsigned ret; - - start = end = reloc = NULL; - -#define SITE(op, x) \ - case PARAVIRT_PATCH(op.x): \ - if (xen_have_vcpu_info_placement) { \ - start = (char *)xen_##x##_direct; \ - end = xen_##x##_direct_end; \ - reloc = xen_##x##_direct_reloc; \ - } \ - goto patch_site - - switch (type) { - SITE(pv_irq_ops, irq_enable); - SITE(pv_irq_ops, irq_disable); - SITE(pv_irq_ops, save_fl); - SITE(pv_irq_ops, restore_fl); -#undef SITE - - patch_site: - if (start == NULL || (end-start) > len) - goto default_patch; - - ret = paravirt_patch_insns(insnbuf, len, start, end); - - /* Note: because reloc is assigned from something that - appears to be an array, gcc assumes it's non-null, - but doesn't know its relationship with start and - end. */ - if (reloc > start && reloc < end) { - int reloc_off = reloc - start; - long *relocp = (long *)(insnbuf + reloc_off); - long delta = start - (char *)addr; - - *relocp += delta; - } - break; - - default_patch: - default: - ret = paravirt_patch_default(type, clobbers, insnbuf, - addr, len); - break; - } - - return ret; -} - static const struct pv_info xen_info __initconst = { .shared_kernel_pmd = 0, @@ -1043,10 +1029,6 @@ static const struct pv_info xen_info __initconst = { .name = "Xen", }; -static const struct pv_init_ops xen_init_ops __initconst = { - .patch = xen_patch, -}; - static const struct pv_cpu_ops xen_cpu_ops __initconst = { .cpuid = xen_cpuid, @@ -1244,7 +1226,7 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; - pv_init_ops = xen_init_ops; + pv_init_ops.patch = paravirt_patch_default; pv_cpu_ops = xen_cpu_ops; x86_platform.get_nmi_reason = xen_get_nmi_reason; diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 33e92955e09d..d4eff5676cfa 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { .safe_halt = xen_safe_halt, .halt = xen_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = xen_adjust_exception_frame, -#endif }; void __init xen_init_irq_ops(void) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index cab28cf2cffb..e437714750f8 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1005,14 +1005,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) /* Get the "official" set of cpus referring to our pagetable. */ if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, mm_cpumask(mm)) - && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) + if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) continue; smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1); } return; } - cpumask_copy(mask, mm_cpumask(mm)); /* * It's possible that a vcpu may have a stale reference to our @@ -1021,6 +1019,7 @@ static void xen_drop_mm_ref(struct mm_struct *mm) * look at its actual current cr3 value, and force it to flush * if needed. */ + cpumask_clear(mask); for_each_online_cpu(cpu) { if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) cpumask_set_cpu(cpu, mask); diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index eff224df813f..dcd31fa39b5d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -1,14 +1,8 @@ /* - * Asm versions of Xen pv-ops, suitable for either direct use or - * inlining. The inline versions are the same as the direct-use - * versions, with the pre- and post-amble chopped off. - * - * This code is encoded for size rather than absolute efficiency, with - * a view to being able to inline as much as possible. + * Asm versions of Xen pv-ops, suitable for direct use. * * We only bother with direct forms (ie, vcpu in percpu data) of the - * operations here; the indirect forms are better handled in C, since - * they're generally too large to inline anyway. + * operations here; the indirect forms are better handled in C. */ #include <asm/asm-offsets.h> @@ -16,7 +10,7 @@ #include <asm/processor-flags.h> #include <asm/frame.h> -#include "xen-asm.h" +#include <linux/linkage.h> /* * Enable events. This clears the event mask and tests the pending @@ -38,13 +32,11 @@ ENTRY(xen_irq_enable_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jz 1f -2: call check_events + call check_events 1: -ENDPATCH(xen_irq_enable_direct) FRAME_END ret ENDPROC(xen_irq_enable_direct) - RELOC(xen_irq_enable_direct, 2b+1) /* @@ -53,10 +45,8 @@ ENDPATCH(xen_irq_enable_direct) */ ENTRY(xen_irq_disable_direct) movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask -ENDPATCH(xen_irq_disable_direct) ret - ENDPROC(xen_irq_disable_direct) - RELOC(xen_irq_disable_direct, 0) +ENDPROC(xen_irq_disable_direct) /* * (xen_)save_fl is used to get the current interrupt enable status. @@ -71,10 +61,8 @@ ENTRY(xen_save_fl_direct) testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask setz %ah addb %ah, %ah -ENDPATCH(xen_save_fl_direct) ret ENDPROC(xen_save_fl_direct) - RELOC(xen_save_fl_direct, 0) /* @@ -101,13 +89,11 @@ ENTRY(xen_restore_fl_direct) /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending jnz 1f -2: call check_events + call check_events 1: -ENDPATCH(xen_restore_fl_direct) FRAME_END ret ENDPROC(xen_restore_fl_direct) - RELOC(xen_restore_fl_direct, 2b+1) /* diff --git a/arch/x86/xen/xen-asm.h b/arch/x86/xen/xen-asm.h deleted file mode 100644 index 465276467a47..000000000000 --- a/arch/x86/xen/xen-asm.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _XEN_XEN_ASM_H -#define _XEN_XEN_ASM_H - -#include <linux/linkage.h> - -#define RELOC(x, v) .globl x##_reloc; x##_reloc=v -#define ENDPATCH(x) .globl x##_end; x##_end=. - -/* Pseudo-flag used for virtual NMI, which we don't implement yet */ -#define XEN_EFLAGS_NMI 0x80000000 - -#endif diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index feb6d40a0860..1200e262a116 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -1,14 +1,8 @@ /* - * Asm versions of Xen pv-ops, suitable for either direct use or - * inlining. The inline versions are the same as the direct-use - * versions, with the pre- and post-amble chopped off. - * - * This code is encoded for size rather than absolute efficiency, with - * a view to being able to inline as much as possible. + * Asm versions of Xen pv-ops, suitable for direct use. * * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C, since - * they're generally too large to inline anyway. + * operations here; the indirect forms are better handled in C. */ #include <asm/thread_info.h> @@ -18,21 +12,10 @@ #include <xen/interface/xen.h> -#include "xen-asm.h" +#include <linux/linkage.h> -/* - * Force an event check by making a hypercall, but preserve regs - * before making the call. - */ -check_events: - push %eax - push %ecx - push %edx - call xen_force_evtchn_callback - pop %edx - pop %ecx - pop %eax - ret +/* Pseudo-flag used for virtual NMI, which we don't implement yet */ +#define XEN_EFLAGS_NMI 0x80000000 /* * This is run where a normal iret would be run, with the same stack setup: diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c3df43141e70..dae2cc33afb5 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -1,14 +1,8 @@ /* - * Asm versions of Xen pv-ops, suitable for either direct use or - * inlining. The inline versions are the same as the direct-use - * versions, with the pre- and post-amble chopped off. - * - * This code is encoded for size rather than absolute efficiency, with - * a view to being able to inline as much as possible. + * Asm versions of Xen pv-ops, suitable for direct use. * * We only bother with direct forms (ie, vcpu in pda) of the - * operations here; the indirect forms are better handled in C, since - * they're generally too large to inline anyway. + * operations here; the indirect forms are better handled in C. */ #include <asm/errno.h> @@ -20,13 +14,44 @@ #include <xen/interface/xen.h> -#include "xen-asm.h" +#include <linux/linkage.h> + +.macro xen_pv_trap name +ENTRY(xen_\name) + pop %rcx + pop %r11 + jmp \name +END(xen_\name) +.endm -ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp), %rcx - mov 8+8(%rsp), %r11 - ret $16 -ENDPROC(xen_adjust_exception_frame) +xen_pv_trap divide_error +xen_pv_trap debug +xen_pv_trap xendebug +xen_pv_trap int3 +xen_pv_trap xenint3 +xen_pv_trap nmi +xen_pv_trap overflow +xen_pv_trap bounds +xen_pv_trap invalid_op +xen_pv_trap device_not_available +xen_pv_trap double_fault +xen_pv_trap coprocessor_segment_overrun +xen_pv_trap invalid_TSS +xen_pv_trap segment_not_present +xen_pv_trap stack_segment +xen_pv_trap general_protection +xen_pv_trap page_fault +xen_pv_trap spurious_interrupt_bug +xen_pv_trap coprocessor_error +xen_pv_trap alignment_check +#ifdef CONFIG_X86_MCE +xen_pv_trap machine_check +#endif /* CONFIG_X86_MCE */ +xen_pv_trap simd_coprocessor_error +#ifdef CONFIG_IA32_EMULATION +xen_pv_trap entry_INT80_compat +#endif +xen_pv_trap hypervisor_callback hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* @@ -46,9 +71,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 */ ENTRY(xen_iret) pushq $0 -1: jmp hypercall_iret -ENDPATCH(xen_iret) -RELOC(xen_iret, 1b+1) + jmp hypercall_iret ENTRY(xen_sysret64) /* @@ -65,9 +88,7 @@ ENTRY(xen_sysret64) pushq %rcx pushq $VGCF_in_syscall -1: jmp hypercall_iret -ENDPATCH(xen_sysret64) -RELOC(xen_sysret64, 1b+1) + jmp hypercall_iret /* * Xen handles syscall callbacks much like ordinary exceptions, which @@ -82,34 +103,47 @@ RELOC(xen_sysret64, 1b+1) * rip * r11 * rsp->rcx - * - * In all the entrypoints, we undo all that to make it look like a - * CPU-generated syscall/sysenter and jump to the normal entrypoint. */ -.macro undo_xen_syscall - mov 0*8(%rsp), %rcx - mov 1*8(%rsp), %r11 - mov 5*8(%rsp), %rsp -.endm - /* Normal 64-bit system call target */ ENTRY(xen_syscall_target) - undo_xen_syscall - jmp entry_SYSCALL_64_after_swapgs + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER_DS and __USER_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER_DS, 4*8(%rsp) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe ENDPROC(xen_syscall_target) #ifdef CONFIG_IA32_EMULATION /* 32-bit compat syscall target */ ENTRY(xen_syscall32_target) - undo_xen_syscall - jmp entry_SYSCALL_compat + popq %rcx + popq %r11 + + /* + * Neither Xen nor the kernel really knows what the old SS and + * CS were. The kernel expects __USER32_DS and __USER32_CS, so + * report those values even though Xen will guess its own values. + */ + movq $__USER32_DS, 4*8(%rsp) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe ENDPROC(xen_syscall32_target) /* 32-bit compat sysenter target */ ENTRY(xen_sysenter_target) - undo_xen_syscall + mov 0*8(%rsp), %rcx + mov 1*8(%rsp), %r11 + mov 5*8(%rsp), %rsp jmp entry_SYSENTER_compat ENDPROC(xen_sysenter_target) diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 72a8e6adebe6..a7525e95d53f 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -58,7 +58,7 @@ ENTRY(hypercall_page) #else ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) /* Map the p2m table to a 512GB-aligned user address. */ - ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad PGDIR_SIZE) + ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M, .quad (PUD_SIZE * PTRS_PER_PUD)) #endif #ifdef CONFIG_XEN_PV ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 0d5004477db6..c8a6d224f7ed 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -129,23 +129,15 @@ static inline void __init xen_efi_init(void) } #endif -/* Declare an asm function, along with symbols needed to make it - inlineable */ -#define DECL_ASM(ret, name, ...) \ - __visible ret name(__VA_ARGS__); \ - extern char name##_end[] __visible; \ - extern char name##_reloc[] __visible - -DECL_ASM(void, xen_irq_enable_direct, void); -DECL_ASM(void, xen_irq_disable_direct, void); -DECL_ASM(unsigned long, xen_save_fl_direct, void); -DECL_ASM(void, xen_restore_fl_direct, unsigned long); +__visible void xen_irq_enable_direct(void); +__visible void xen_irq_disable_direct(void); +__visible unsigned long xen_save_fl_direct(void); +__visible void xen_restore_fl_direct(unsigned long); /* These are not functions, and cannot be called normally */ __visible void xen_iret(void); __visible void xen_sysret32(void); __visible void xen_sysret64(void); -__visible void xen_adjust_exception_frame(void); extern int xen_panic_handler_init(void); |