diff options
Diffstat (limited to 'arch/x86/xen')
-rw-r--r-- | arch/x86/xen/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/xen/enlighten.c | 108 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 20 | ||||
-rw-r--r-- | arch/x86/xen/smp.c | 33 | ||||
-rw-r--r-- | arch/x86/xen/smp.h | 1 | ||||
-rw-r--r-- | arch/x86/xen/spinlock.c | 25 | ||||
-rw-r--r-- | arch/x86/xen/time.c | 13 |
7 files changed, 171 insertions, 31 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 131dacd2748a..1a3c76505649 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -4,7 +4,7 @@ config XEN bool "Xen guest support" - select PARAVIRT + depends on PARAVIRT select PARAVIRT_CLOCK select XEN_HAVE_PVMMU depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8e1c7b95c3b..a492be2635ac 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/edd.h> #include <xen/xen.h> #include <xen/events.h> @@ -84,7 +85,29 @@ EXPORT_SYMBOL_GPL(hypercall_page); +/* + * Pointer to the xen_vcpu_info structure or + * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info + * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info + * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point + * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to + * acknowledge pending events. + * Also more subtly it is used by the patched version of irq enable/disable + * e.g. xen_irq_enable_direct and xen_iret in PV mode. + * + * The desire to be able to do those mask/unmask operations as a single + * instruction by using the per-cpu offset held in %gs is the real reason + * vcpu info is in a per-cpu pointer and the original reason for this + * hypercall. + * + */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); + +/* + * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info + * hypercall. This can be used both in PV and PVHVM mode. The structure + * overrides the default per_cpu(xen_vcpu, cpu) value. + */ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); enum xen_domain_type xen_domain_type = XEN_NATIVE; @@ -156,6 +179,21 @@ static void xen_vcpu_setup(int cpu) BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); + /* + * This path is called twice on PVHVM - first during bootup via + * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being + * hotplugged: cpu_up -> xen_hvm_cpu_notify. + * As we can only do the VCPUOP_register_vcpu_info once lets + * not over-write its result. + * + * For PV it is called during restore (xen_vcpu_restore) and bootup + * (xen_setup_vcpu_info_placement). The hotplug mechanism does not + * use this function. + */ + if (xen_hvm_domain()) { + if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu)) + return; + } if (cpu < MAX_VIRT_CPUS) per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; @@ -171,7 +209,12 @@ static void xen_vcpu_setup(int cpu) /* Check to see if the hypervisor will put the vcpu_info structure where we want it, which allows direct access via - a percpu-variable. */ + a percpu-variable. + N.B. This hypercall can _only_ be called once per CPU. Subsequent + calls will error out with -EINVAL. This is due to the fact that + hypervisor has no unregister variant and this hypercall does not + allow to over-write info.mfn and info.offset. + */ err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info); if (err) { @@ -386,6 +429,9 @@ static void __init xen_init_cpuid_mask(void) cpuid_leaf1_edx_mask &= ~((1 << X86_FEATURE_APIC) | /* disable local APIC */ (1 << X86_FEATURE_ACPI)); /* disable ACPI */ + + cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32)); + ax = 1; cx = 0; xen_cpuid(&ax, &bx, &cx, &dx); @@ -1220,7 +1266,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .alloc_ldt = xen_alloc_ldt, .free_ldt = xen_free_ldt, - .store_gdt = native_store_gdt, .store_idt = native_store_idt, .store_tr = xen_store_tr, @@ -1306,6 +1351,55 @@ static const struct machine_ops xen_machine_ops __initconst = { .emergency_restart = xen_emergency_restart, }; +static void __init xen_boot_params_init_edd(void) +{ +#if IS_ENABLED(CONFIG_EDD) + struct xen_platform_op op; + struct edd_info *edd_info; + u32 *mbr_signature; + unsigned nr; + int ret; + + edd_info = boot_params.eddbuf; + mbr_signature = boot_params.edd_mbr_sig_buffer; + + op.cmd = XENPF_firmware_info; + + op.u.firmware_info.type = XEN_FW_DISK_INFO; + for (nr = 0; nr < EDDMAXNR; nr++) { + struct edd_info *info = edd_info + nr; + + op.u.firmware_info.index = nr; + info->params.length = sizeof(info->params); + set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params, + &info->params); + ret = HYPERVISOR_dom0_op(&op); + if (ret) + break; + +#define C(x) info->x = op.u.firmware_info.u.disk_info.x + C(device); + C(version); + C(interface_support); + C(legacy_max_cylinder); + C(legacy_max_head); + C(legacy_sectors_per_track); +#undef C + } + boot_params.eddbuf_entries = nr; + + op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE; + for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) { + op.u.firmware_info.index = nr; + ret = HYPERVISOR_dom0_op(&op); + if (ret) + break; + mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature; + } + boot_params.edd_mbr_sig_buf_entries = nr; +#endif +} + /* * Set up the GDT and segment registers for -fstack-protector. Until * we do this, we have to be careful not to call any stack-protected @@ -1508,6 +1602,8 @@ asmlinkage void __init xen_start_kernel(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_smp_config = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; + + xen_boot_params_init_edd(); } #ifdef CONFIG_PCI /* PCI BIOS service won't work from a PV guest. */ @@ -1552,6 +1648,9 @@ void __ref xen_hvm_init_shared_info(void) * online but xen_hvm_init_shared_info is run at resume time too and * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { + /* Leave it to be NULL. */ + if (cpu >= MAX_VIRT_CPUS) + continue; per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; } } @@ -1589,8 +1688,11 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: xen_vcpu_setup(cpu); - if (xen_have_vector_callback) + if (xen_have_vector_callback) { xen_init_lock_cpu(cpu); + if (xen_feature(XENFEAT_hvm_safe_pvclock)) + xen_setup_timer(cpu); + } break; default: break; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e8e34938c57d..fdc3ba28ca38 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ - - pv_mmu_ops.write_cr3 = &xen_write_cr3; } #endif @@ -1750,14 +1748,18 @@ static void *m2v(phys_addr_t maddr) } /* Set the page permissions on an identity-mapped pages */ -static void set_page_prot(void *addr, pgprot_t prot) +static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } +static void set_page_prot(void *addr, pgprot_t prot) +{ + return set_page_prot_flags(addr, prot, UVMF_NONE); +} #ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { @@ -1841,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, unsigned long addr) { if (*pt_base == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_base)++; } if (*pt_end == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_end)--; } @@ -2041,9 +2043,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) switch (idx) { case FIX_BTMAP_END ... FIX_BTMAP_BEGIN: -#ifdef CONFIG_X86_F00F_BUG - case FIX_F00F_IDT: -#endif + case FIX_RO_IDT: #ifdef CONFIG_X86_32 case FIX_WP_TEST: case FIX_VDSO: @@ -2122,6 +2122,7 @@ static void __init xen_post_allocator_init(void) #endif #ifdef CONFIG_X86_64 + pv_mmu_ops.write_cr3 = &xen_write_cr3; SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif xen_mark_init_mm_pinned(); @@ -2197,6 +2198,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .lazy_mode = { .enter = paravirt_enter_lazy_mmu, .leave = xen_leave_lazy_mmu, + .flush = paravirt_flush_lazy_mmu, }, .set_fixmap = xen_set_fixmap, diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 09ea61d2e02f..fb44426fe931 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -95,7 +95,7 @@ static void __cpuinit cpu_bringup(void) static void __cpuinit cpu_bringup_and_idle(void) { cpu_bringup(); - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); } static int xen_smp_intr_init(unsigned int cpu) @@ -144,6 +144,13 @@ static int xen_smp_intr_init(unsigned int cpu) goto fail; per_cpu(xen_callfuncsingle_irq, cpu) = rc; + /* + * The IRQ worker on PVHVM goes through the native path and uses the + * IPI mechanism. + */ + if (xen_hvm_domain()) + return 0; + callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu); rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR, cpu, @@ -167,6 +174,9 @@ static int xen_smp_intr_init(unsigned int cpu) if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); + if (xen_hvm_domain()) + return rc; + if (per_cpu(xen_irq_work, cpu) >= 0) unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); @@ -418,7 +428,7 @@ static int xen_cpu_disable(void) static void xen_cpu_die(unsigned int cpu) { - while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { + while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) { current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } @@ -426,7 +436,8 @@ static void xen_cpu_die(unsigned int cpu) unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); + if (!xen_hvm_domain()) + unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); } @@ -565,24 +576,22 @@ void xen_send_IPI_mask_allbutself(const struct cpumask *mask, { unsigned cpu; unsigned int this_cpu = smp_processor_id(); + int xen_vector = xen_map_vector(vector); - if (!(num_online_cpus() > 1)) + if (!(num_online_cpus() > 1) || (xen_vector < 0)) return; for_each_cpu_and(cpu, mask, cpu_online_mask) { if (this_cpu == cpu) continue; - xen_smp_send_call_function_single_ipi(cpu); + xen_send_IPI_one(cpu, xen_vector); } } void xen_send_IPI_allbutself(int vector) { - int xen_vector = xen_map_vector(vector); - - if (xen_vector >= 0) - xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector); + xen_send_IPI_mask_allbutself(cpu_online_mask, vector); } static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) @@ -657,11 +666,7 @@ static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle) static void xen_hvm_cpu_die(unsigned int cpu) { - unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL); + xen_cpu_die(cpu); native_cpu_die(cpu); } diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index 8981a76d081a..c7c2d89efd76 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -5,7 +5,6 @@ extern void xen_send_IPI_mask(const struct cpumask *mask, extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask, int vector); extern void xen_send_IPI_allbutself(int vector); -extern void physflat_send_IPI_allbutself(int vector); extern void xen_send_IPI_all(int vector); extern void xen_send_IPI_self(int vector); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index f7a080ef0354..3002ec1bb71a 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -364,6 +364,16 @@ void __cpuinit xen_init_lock_cpu(int cpu) int irq; const char *name; + WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", + cpu, per_cpu(lock_kicker_irq, cpu)); + + /* + * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 + * (xen: disable PV spinlocks on HVM) + */ + if (xen_hvm_domain()) + return; + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, cpu, @@ -382,11 +392,26 @@ void __cpuinit xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + /* + * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 + * (xen: disable PV spinlocks on HVM) + */ + if (xen_hvm_domain()) + return; + unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + per_cpu(lock_kicker_irq, cpu) = -1; } void __init xen_init_spinlocks(void) { + /* + * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 + * (xen: disable PV spinlocks on HVM) + */ + if (xen_hvm_domain()) + return; + BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); pv_lock_ops.spin_is_locked = xen_spin_is_locked; diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0296a9522501..3d88bfdf9e1c 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -377,7 +377,7 @@ static const struct clock_event_device xen_vcpuop_clockevent = { static const struct clock_event_device *xen_clockevent = &xen_timerop_clockevent; -static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events); +static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events) = { .irq = -1 }; static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) { @@ -401,6 +401,9 @@ void xen_setup_timer(int cpu) struct clock_event_device *evt; int irq; + evt = &per_cpu(xen_clock_events, cpu); + WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu); + printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu); name = kasprintf(GFP_KERNEL, "timer%d", cpu); @@ -413,7 +416,6 @@ void xen_setup_timer(int cpu) IRQF_FORCE_RESUME, name, NULL); - evt = &per_cpu(xen_clock_events, cpu); memcpy(evt, xen_clockevent, sizeof(*evt)); evt->cpumask = cpumask_of(cpu); @@ -426,6 +428,7 @@ void xen_teardown_timer(int cpu) BUG_ON(cpu == 0); evt = &per_cpu(xen_clock_events, cpu); unbind_from_irqhandler(evt->irq, NULL); + evt->irq = -1; } void xen_setup_cpu_clockevents(void) @@ -497,7 +500,11 @@ static void xen_hvm_setup_cpu_clockevents(void) { int cpu = smp_processor_id(); xen_setup_runstate_info(cpu); - xen_setup_timer(cpu); + /* + * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence + * doing it xen_hvm_cpu_notify (which gets called by smp_init during + * early bootup and also during CPU hotplug events). + */ xen_setup_cpu_clockevents(); } |