summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/Kconfig2
-rw-r--r--arch/x86/xen/enlighten.c108
-rw-r--r--arch/x86/xen/mmu.c20
-rw-r--r--arch/x86/xen/smp.c33
-rw-r--r--arch/x86/xen/smp.h1
-rw-r--r--arch/x86/xen/spinlock.c25
-rw-r--r--arch/x86/xen/time.c13
7 files changed, 171 insertions, 31 deletions
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 131dacd2748a..1a3c76505649 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -4,7 +4,7 @@
config XEN
bool "Xen guest support"
- select PARAVIRT
+ depends on PARAVIRT
select PARAVIRT_CLOCK
select XEN_HAVE_PVMMU
depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8e1c7b95c3b..a492be2635ac 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/edd.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -84,7 +85,29 @@
EXPORT_SYMBOL_GPL(hypercall_page);
+/*
+ * Pointer to the xen_vcpu_info structure or
+ * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info
+ * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
+ * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point
+ * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to
+ * acknowledge pending events.
+ * Also more subtly it is used by the patched version of irq enable/disable
+ * e.g. xen_irq_enable_direct and xen_iret in PV mode.
+ *
+ * The desire to be able to do those mask/unmask operations as a single
+ * instruction by using the per-cpu offset held in %gs is the real reason
+ * vcpu info is in a per-cpu pointer and the original reason for this
+ * hypercall.
+ *
+ */
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
+
+/*
+ * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info
+ * hypercall. This can be used both in PV and PVHVM mode. The structure
+ * overrides the default per_cpu(xen_vcpu, cpu) value.
+ */
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
enum xen_domain_type xen_domain_type = XEN_NATIVE;
@@ -156,6 +179,21 @@ static void xen_vcpu_setup(int cpu)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
+ /*
+ * This path is called twice on PVHVM - first during bootup via
+ * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
+ * hotplugged: cpu_up -> xen_hvm_cpu_notify.
+ * As we can only do the VCPUOP_register_vcpu_info once lets
+ * not over-write its result.
+ *
+ * For PV it is called during restore (xen_vcpu_restore) and bootup
+ * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
+ * use this function.
+ */
+ if (xen_hvm_domain()) {
+ if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
+ return;
+ }
if (cpu < MAX_VIRT_CPUS)
per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
@@ -171,7 +209,12 @@ static void xen_vcpu_setup(int cpu)
/* Check to see if the hypervisor will put the vcpu_info
structure where we want it, which allows direct access via
- a percpu-variable. */
+ a percpu-variable.
+ N.B. This hypercall can _only_ be called once per CPU. Subsequent
+ calls will error out with -EINVAL. This is due to the fact that
+ hypervisor has no unregister variant and this hypercall does not
+ allow to over-write info.mfn and info.offset.
+ */
err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
if (err) {
@@ -386,6 +429,9 @@ static void __init xen_init_cpuid_mask(void)
cpuid_leaf1_edx_mask &=
~((1 << X86_FEATURE_APIC) | /* disable local APIC */
(1 << X86_FEATURE_ACPI)); /* disable ACPI */
+
+ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
+
ax = 1;
cx = 0;
xen_cpuid(&ax, &bx, &cx, &dx);
@@ -1220,7 +1266,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
- .store_gdt = native_store_gdt,
.store_idt = native_store_idt,
.store_tr = xen_store_tr,
@@ -1306,6 +1351,55 @@ static const struct machine_ops xen_machine_ops __initconst = {
.emergency_restart = xen_emergency_restart,
};
+static void __init xen_boot_params_init_edd(void)
+{
+#if IS_ENABLED(CONFIG_EDD)
+ struct xen_platform_op op;
+ struct edd_info *edd_info;
+ u32 *mbr_signature;
+ unsigned nr;
+ int ret;
+
+ edd_info = boot_params.eddbuf;
+ mbr_signature = boot_params.edd_mbr_sig_buffer;
+
+ op.cmd = XENPF_firmware_info;
+
+ op.u.firmware_info.type = XEN_FW_DISK_INFO;
+ for (nr = 0; nr < EDDMAXNR; nr++) {
+ struct edd_info *info = edd_info + nr;
+
+ op.u.firmware_info.index = nr;
+ info->params.length = sizeof(info->params);
+ set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
+ &info->params);
+ ret = HYPERVISOR_dom0_op(&op);
+ if (ret)
+ break;
+
+#define C(x) info->x = op.u.firmware_info.u.disk_info.x
+ C(device);
+ C(version);
+ C(interface_support);
+ C(legacy_max_cylinder);
+ C(legacy_max_head);
+ C(legacy_sectors_per_track);
+#undef C
+ }
+ boot_params.eddbuf_entries = nr;
+
+ op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
+ for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
+ op.u.firmware_info.index = nr;
+ ret = HYPERVISOR_dom0_op(&op);
+ if (ret)
+ break;
+ mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
+ }
+ boot_params.edd_mbr_sig_buf_entries = nr;
+#endif
+}
+
/*
* Set up the GDT and segment registers for -fstack-protector. Until
* we do this, we have to be careful not to call any stack-protected
@@ -1508,6 +1602,8 @@ asmlinkage void __init xen_start_kernel(void)
/* Avoid searching for BIOS MP tables */
x86_init.mpparse.find_smp_config = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+
+ xen_boot_params_init_edd();
}
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
@@ -1552,6 +1648,9 @@ void __ref xen_hvm_init_shared_info(void)
* online but xen_hvm_init_shared_info is run at resume time too and
* in that case multiple vcpus might be online. */
for_each_online_cpu(cpu) {
+ /* Leave it to be NULL. */
+ if (cpu >= MAX_VIRT_CPUS)
+ continue;
per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
}
}
@@ -1589,8 +1688,11 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
xen_vcpu_setup(cpu);
- if (xen_have_vector_callback)
+ if (xen_have_vector_callback) {
xen_init_lock_cpu(cpu);
+ if (xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_timer(cpu);
+ }
break;
default:
break;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e8e34938c57d..fdc3ba28ca38 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3)
__xen_write_cr3(true, cr3);
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
-
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
}
#endif
@@ -1750,14 +1748,18 @@ static void *m2v(phys_addr_t maddr)
}
/* Set the page permissions on an identity-mapped pages */
-static void set_page_prot(void *addr, pgprot_t prot)
+static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
- if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+ if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG();
}
+static void set_page_prot(void *addr, pgprot_t prot)
+{
+ return set_page_prot_flags(addr, prot, UVMF_NONE);
+}
#ifdef CONFIG_X86_32
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
@@ -1841,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
unsigned long addr)
{
if (*pt_base == PFN_DOWN(__pa(addr))) {
- set_page_prot((void *)addr, PAGE_KERNEL);
+ set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
clear_page((void *)addr);
(*pt_base)++;
}
if (*pt_end == PFN_DOWN(__pa(addr))) {
- set_page_prot((void *)addr, PAGE_KERNEL);
+ set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
clear_page((void *)addr);
(*pt_end)--;
}
@@ -2041,9 +2043,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
switch (idx) {
case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
-#ifdef CONFIG_X86_F00F_BUG
- case FIX_F00F_IDT:
-#endif
+ case FIX_RO_IDT:
#ifdef CONFIG_X86_32
case FIX_WP_TEST:
case FIX_VDSO:
@@ -2122,6 +2122,7 @@ static void __init xen_post_allocator_init(void)
#endif
#ifdef CONFIG_X86_64
+ pv_mmu_ops.write_cr3 = &xen_write_cr3;
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_mark_init_mm_pinned();
@@ -2197,6 +2198,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.lazy_mode = {
.enter = paravirt_enter_lazy_mmu,
.leave = xen_leave_lazy_mmu,
+ .flush = paravirt_flush_lazy_mmu,
},
.set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 09ea61d2e02f..fb44426fe931 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -95,7 +95,7 @@ static void __cpuinit cpu_bringup(void)
static void __cpuinit cpu_bringup_and_idle(void)
{
cpu_bringup();
- cpu_idle();
+ cpu_startup_entry(CPUHP_ONLINE);
}
static int xen_smp_intr_init(unsigned int cpu)
@@ -144,6 +144,13 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail;
per_cpu(xen_callfuncsingle_irq, cpu) = rc;
+ /*
+ * The IRQ worker on PVHVM goes through the native path and uses the
+ * IPI mechanism.
+ */
+ if (xen_hvm_domain())
+ return 0;
+
callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
cpu,
@@ -167,6 +174,9 @@ static int xen_smp_intr_init(unsigned int cpu)
if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
NULL);
+ if (xen_hvm_domain())
+ return rc;
+
if (per_cpu(xen_irq_work, cpu) >= 0)
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
@@ -418,7 +428,7 @@ static int xen_cpu_disable(void)
static void xen_cpu_die(unsigned int cpu)
{
- while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+ while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10);
}
@@ -426,7 +436,8 @@ static void xen_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
+ if (!xen_hvm_domain())
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
}
@@ -565,24 +576,22 @@ void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
{
unsigned cpu;
unsigned int this_cpu = smp_processor_id();
+ int xen_vector = xen_map_vector(vector);
- if (!(num_online_cpus() > 1))
+ if (!(num_online_cpus() > 1) || (xen_vector < 0))
return;
for_each_cpu_and(cpu, mask, cpu_online_mask) {
if (this_cpu == cpu)
continue;
- xen_smp_send_call_function_single_ipi(cpu);
+ xen_send_IPI_one(cpu, xen_vector);
}
}
void xen_send_IPI_allbutself(int vector)
{
- int xen_vector = xen_map_vector(vector);
-
- if (xen_vector >= 0)
- xen_send_IPI_mask_allbutself(cpu_online_mask, xen_vector);
+ xen_send_IPI_mask_allbutself(cpu_online_mask, vector);
}
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
@@ -657,11 +666,7 @@ static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
static void xen_hvm_cpu_die(unsigned int cpu)
{
- unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
+ xen_cpu_die(cpu);
native_cpu_die(cpu);
}
diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h
index 8981a76d081a..c7c2d89efd76 100644
--- a/arch/x86/xen/smp.h
+++ b/arch/x86/xen/smp.h
@@ -5,7 +5,6 @@ extern void xen_send_IPI_mask(const struct cpumask *mask,
extern void xen_send_IPI_mask_allbutself(const struct cpumask *mask,
int vector);
extern void xen_send_IPI_allbutself(int vector);
-extern void physflat_send_IPI_allbutself(int vector);
extern void xen_send_IPI_all(int vector);
extern void xen_send_IPI_self(int vector);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index f7a080ef0354..3002ec1bb71a 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -364,6 +364,16 @@ void __cpuinit xen_init_lock_cpu(int cpu)
int irq;
const char *name;
+ WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
+ cpu, per_cpu(lock_kicker_irq, cpu));
+
+ /*
+ * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
+ * (xen: disable PV spinlocks on HVM)
+ */
+ if (xen_hvm_domain())
+ return;
+
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
@@ -382,11 +392,26 @@ void __cpuinit xen_init_lock_cpu(int cpu)
void xen_uninit_lock_cpu(int cpu)
{
+ /*
+ * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
+ * (xen: disable PV spinlocks on HVM)
+ */
+ if (xen_hvm_domain())
+ return;
+
unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+ per_cpu(lock_kicker_irq, cpu) = -1;
}
void __init xen_init_spinlocks(void)
{
+ /*
+ * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
+ * (xen: disable PV spinlocks on HVM)
+ */
+ if (xen_hvm_domain())
+ return;
+
BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0296a9522501..3d88bfdf9e1c 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -377,7 +377,7 @@ static const struct clock_event_device xen_vcpuop_clockevent = {
static const struct clock_event_device *xen_clockevent =
&xen_timerop_clockevent;
-static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
+static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events) = { .irq = -1 };
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
@@ -401,6 +401,9 @@ void xen_setup_timer(int cpu)
struct clock_event_device *evt;
int irq;
+ evt = &per_cpu(xen_clock_events, cpu);
+ WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
+
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
name = kasprintf(GFP_KERNEL, "timer%d", cpu);
@@ -413,7 +416,6 @@ void xen_setup_timer(int cpu)
IRQF_FORCE_RESUME,
name, NULL);
- evt = &per_cpu(xen_clock_events, cpu);
memcpy(evt, xen_clockevent, sizeof(*evt));
evt->cpumask = cpumask_of(cpu);
@@ -426,6 +428,7 @@ void xen_teardown_timer(int cpu)
BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu);
unbind_from_irqhandler(evt->irq, NULL);
+ evt->irq = -1;
}
void xen_setup_cpu_clockevents(void)
@@ -497,7 +500,11 @@ static void xen_hvm_setup_cpu_clockevents(void)
{
int cpu = smp_processor_id();
xen_setup_runstate_info(cpu);
- xen_setup_timer(cpu);
+ /*
+ * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
+ * doing it xen_hvm_cpu_notify (which gets called by smp_init during
+ * early bootup and also during CPU hotplug events).
+ */
xen_setup_cpu_clockevents();
}
OpenPOWER on IntegriCloud