summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/xen-blkfront.c4
-rw-r--r--drivers/char/tpm/xen-tpmfront.c4
-rw-r--r--drivers/input/misc/xen-kbdfront.c4
-rw-r--r--drivers/net/xen-netfront.c2
-rw-r--r--drivers/pci/xen-pcifront.c4
-rw-r--r--drivers/video/xen-fbfront.c6
-rw-r--r--drivers/xen/Kconfig1
-rw-r--r--drivers/xen/Makefile3
-rw-r--r--drivers/xen/balloon.c9
-rw-r--r--drivers/xen/dbgp.c2
-rw-r--r--drivers/xen/events/Makefile5
-rw-r--r--drivers/xen/events/events_2l.c372
-rw-r--r--drivers/xen/events/events_base.c (renamed from drivers/xen/events.c)797
-rw-r--r--drivers/xen/events/events_fifo.c428
-rw-r--r--drivers/xen/events/events_internal.h150
-rw-r--r--drivers/xen/evtchn.c2
-rw-r--r--drivers/xen/gntdev.c2
-rw-r--r--drivers/xen/grant-table.c90
-rw-r--r--drivers/xen/pci.c2
-rw-r--r--drivers/xen/platform-pci.c11
-rw-r--r--drivers/xen/xenbus/xenbus_client.c3
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c2
22 files changed, 1348 insertions, 555 deletions
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index c4a4c9006288..f9c43f91f03e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1356,7 +1356,7 @@ static int blkfront_probe(struct xenbus_device *dev,
char *type;
int len;
/* no unplug has been done: do not hook devices != xen vbds */
- if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) {
+ if (xen_has_pv_and_legacy_disk_devices()) {
int major;
if (!VDEV_IS_EXTENDED(vdevice))
@@ -2079,7 +2079,7 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
- if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ if (!xen_has_pv_disk_devices())
return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 92b097064df5..2064b4527040 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -17,6 +17,7 @@
#include <xen/xenbus.h>
#include <xen/page.h>
#include "tpm.h"
+#include <xen/platform_pci.h>
struct tpm_private {
struct tpm_chip *chip;
@@ -378,6 +379,9 @@ static int __init xen_tpmfront_init(void)
if (!xen_domain())
return -ENODEV;
+ if (!xen_has_pv_devices())
+ return -ENODEV;
+
return xenbus_register_frontend(&tpmfront_driver);
}
module_init(xen_tpmfront_init);
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index e21c1816a8f9..fbfdc10573be 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -29,6 +29,7 @@
#include <xen/interface/io/fbif.h>
#include <xen/interface/io/kbdif.h>
#include <xen/xenbus.h>
+#include <xen/platform_pci.h>
struct xenkbd_info {
struct input_dev *kbd;
@@ -380,6 +381,9 @@ static int __init xenkbd_init(void)
if (xen_initial_domain())
return -ENODEV;
+ if (!xen_has_pv_devices())
+ return -ENODEV;
+
return xenbus_register_frontend(&xenkbd_driver);
}
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index e59acb1daa23..2ab82fe75ede 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2115,7 +2115,7 @@ static int __init netif_init(void)
if (!xen_domain())
return -ENODEV;
- if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ if (!xen_has_pv_nic_devices())
return -ENODEV;
pr_info("Initialising Xen virtual ethernet driver\n");
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index d1cd60f51f87..179b8edc2262 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -20,6 +20,7 @@
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/time.h>
+#include <xen/platform_pci.h>
#include <asm/xen/swiotlb-xen.h>
#define INVALID_GRANT_REF (0)
@@ -1146,6 +1147,9 @@ static int __init pcifront_init(void)
if (!xen_pv_domain() || xen_initial_domain())
return -ENODEV;
+ if (!xen_has_pv_devices())
+ return -ENODEV;
+
pci_frontend_registrar(1 /* enable */);
return xenbus_register_frontend(&xenpci_driver);
diff --git a/drivers/video/xen-fbfront.c b/drivers/video/xen-fbfront.c
index cd005c227a23..901014bbc821 100644
--- a/drivers/video/xen-fbfront.c
+++ b/drivers/video/xen-fbfront.c
@@ -35,6 +35,7 @@
#include <xen/interface/io/fbif.h>
#include <xen/interface/io/protocols.h>
#include <xen/xenbus.h>
+#include <xen/platform_pci.h>
struct xenfb_info {
unsigned char *fb;
@@ -692,13 +693,16 @@ static DEFINE_XENBUS_DRIVER(xenfb, ,
static int __init xenfb_init(void)
{
- if (!xen_pv_domain())
+ if (!xen_domain())
return -ENODEV;
/* Nothing to do if running in dom0. */
if (xen_initial_domain())
return -ENODEV;
+ if (!xen_has_pv_devices())
+ return -ENODEV;
+
return xenbus_register_frontend(&xenfb_driver);
}
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 12ba6db65142..38fb36e1c592 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
config XEN_BALLOON
bool "Xen memory balloon driver"
- depends on !ARM
default y
help
The balloon driver allows the Xen domain to request more memory from
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634a..d75c811bfa56 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
endif
obj-$(CONFIG_X86) += fallback.o
-obj-y += grant-table.o features.o events.o balloon.o manage.o
+obj-y += grant-table.o features.o balloon.o manage.o
+obj-y += events/
obj-y += xenbus/
nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4c02e2b94103..37d06ea624aa 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
return page;
}
-static struct page *balloon_first_page(void)
-{
- if (list_empty(&ballooned_pages))
- return NULL;
- return list_entry(ballooned_pages.next, struct page, lru);
-}
-
static struct page *balloon_next_page(struct page *page)
{
struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
if (nr_pages > ARRAY_SIZE(frame_list))
nr_pages = ARRAY_SIZE(frame_list);
- page = balloon_first_page();
+ page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
for (i = 0; i < nr_pages; i++) {
if (!page) {
nr_pages = i;
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455f..8145a59fd9f6 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
dbgp.op = op;
#ifdef CONFIG_PCI
- if (ctrlr->bus == &pci_bus_type) {
+ if (dev_is_pci(ctrlr)) {
const struct pci_dev *pdev = to_pci_dev(ctrlr);
dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 000000000000..62be55cd981d
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
+obj-y += events.o
+
+events-y += events_base.o
+events-y += events_2l.o
+events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 000000000000..d7ff91757307
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,372 @@
+/*
+ * Xen event channels (2-level ABI)
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+/*
+ * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
+ * careful to only use bitops which allow for this (e.g
+ * test_bit/find_first_bit and friends but not __ffs) and to pass
+ * BITS_PER_EVTCHN_WORD as the bitmask length.
+ */
+#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
+/*
+ * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
+ * array. Primarily to avoid long lines (hence the terse name).
+ */
+#define BM(x) (unsigned long *)(x)
+/* Find the first set bit in a evtchn mask */
+#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
+
+static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
+ cpu_evtchn_mask);
+
+static unsigned evtchn_2l_max_channels(void)
+{
+ return EVTCHN_2L_NR_CHANNELS;
+}
+
+static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+ clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
+ set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
+static void evtchn_2l_clear_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static void evtchn_2l_set_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_is_pending(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_test_and_set_mask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_mask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ sync_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_unmask(unsigned port)
+{
+ struct shared_info *s = HYPERVISOR_shared_info;
+ unsigned int cpu = get_cpu();
+ int do_hypercall = 0, evtchn_pending = 0;
+
+ BUG_ON(!irqs_disabled());
+
+ if (unlikely((cpu != cpu_from_evtchn(port))))
+ do_hypercall = 1;
+ else {
+ /*
+ * Need to clear the mask before checking pending to
+ * avoid a race with an event becoming pending.
+ *
+ * EVTCHNOP_unmask will only trigger an upcall if the
+ * mask bit was set, so if a hypercall is needed
+ * remask the event.
+ */
+ sync_clear_bit(port, BM(&s->evtchn_mask[0]));
+ evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
+
+ if (unlikely(evtchn_pending && xen_hvm_domain())) {
+ sync_set_bit(port, BM(&s->evtchn_mask[0]));
+ do_hypercall = 1;
+ }
+ }
+
+ /* Slow path (hypercall) if this is a non-local port or if this is
+ * an hvm domain and an event is pending (hvm domains don't have
+ * their own implementation of irq_enable). */
+ if (do_hypercall) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ } else {
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ /*
+ * The following is basically the equivalent of
+ * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+ * the interrupt edge' if the channel is masked.
+ */
+ if (evtchn_pending &&
+ !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
+ BM(&vcpu_info->evtchn_pending_sel)))
+ vcpu_info->evtchn_upcall_pending = 1;
+ }
+
+ put_cpu();
+}
+
+static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+
+/*
+ * Mask out the i least significant bits of w
+ */
+#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
+
+static inline xen_ulong_t active_evtchns(unsigned int cpu,
+ struct shared_info *sh,
+ unsigned int idx)
+{
+ return sh->evtchn_pending[idx] &
+ per_cpu(cpu_evtchn_mask, cpu)[idx] &
+ ~sh->evtchn_mask[idx];
+}
+
+/*
+ * Search the CPU's pending events bitmasks. For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for handling.
+ *
+ * Xen uses a two-level bitmap to speed searching. The first level is
+ * a bitset of words which contain pending event bits. The second
+ * level is a bitset of pending events themselves.
+ */
+static void evtchn_2l_handle_events(unsigned cpu)
+{
+ int irq;
+ xen_ulong_t pending_words;
+ xen_ulong_t pending_bits;
+ int start_word_idx, start_bit_idx;
+ int word_idx, bit_idx;
+ int i;
+ struct irq_desc *desc;
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+ /* Timer interrupt has highest priority. */
+ irq = irq_from_virq(cpu, VIRQ_TIMER);
+ if (irq != -1) {
+ unsigned int evtchn = evtchn_from_irq(irq);
+ word_idx = evtchn / BITS_PER_LONG;
+ bit_idx = evtchn % BITS_PER_LONG;
+ if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+ }
+
+ /*
+ * Master flag must be cleared /before/ clearing
+ * selector flag. xchg_xen_ulong must contain an
+ * appropriate barrier.
+ */
+ pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
+
+ start_word_idx = __this_cpu_read(current_word_idx);
+ start_bit_idx = __this_cpu_read(current_bit_idx);
+
+ word_idx = start_word_idx;
+
+ for (i = 0; pending_words != 0; i++) {
+ xen_ulong_t words;
+
+ words = MASK_LSBS(pending_words, word_idx);
+
+ /*
+ * If we masked out all events, wrap to beginning.
+ */
+ if (words == 0) {
+ word_idx = 0;
+ bit_idx = 0;
+ continue;
+ }
+ word_idx = EVTCHN_FIRST_BIT(words);
+
+ pending_bits = active_evtchns(cpu, s, word_idx);
+ bit_idx = 0; /* usually scan entire word from start */
+ /*
+ * We scan the starting word in two parts.
+ *
+ * 1st time: start in the middle, scanning the
+ * upper bits.
+ *
+ * 2nd time: scan the whole word (not just the
+ * parts skipped in the first pass) -- if an
+ * event in the previously scanned bits is
+ * pending again it would just be scanned on
+ * the next loop anyway.
+ */
+ if (word_idx == start_word_idx) {
+ if (i == 0)
+ bit_idx = start_bit_idx;
+ }
+
+ do {
+ xen_ulong_t bits;
+ int port;
+
+ bits = MASK_LSBS(pending_bits, bit_idx);
+
+ /* If we masked out all events, move on. */
+ if (bits == 0)
+ break;
+
+ bit_idx = EVTCHN_FIRST_BIT(bits);
+
+ /* Process port. */
+ port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
+ irq = get_evtchn_to_irq(port);
+
+ if (irq != -1) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+
+ bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+
+ /* Next caller starts at last processed + 1 */
+ __this_cpu_write(current_word_idx,
+ bit_idx ? word_idx :
+ (word_idx+1) % BITS_PER_EVTCHN_WORD);
+ __this_cpu_write(current_bit_idx, bit_idx);
+ } while (bit_idx != 0);
+
+ /* Scan start_l1i twice; all others once. */
+ if ((word_idx != start_word_idx) || (i != 0))
+ pending_words &= ~(1UL << word_idx);
+
+ word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
+ }
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+ struct shared_info *sh = HYPERVISOR_shared_info;
+ int cpu = smp_processor_id();
+ xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+ int i;
+ unsigned long flags;
+ static DEFINE_SPINLOCK(debug_lock);
+ struct vcpu_info *v;
+
+ spin_lock_irqsave(&debug_lock, flags);
+
+ printk("\nvcpu %d\n ", cpu);
+
+ for_each_online_cpu(i) {
+ int pending;
+ v = per_cpu(xen_vcpu, i);
+ pending = (get_irq_regs() && i == cpu)
+ ? xen_irqs_disabled(get_irq_regs())
+ : v->evtchn_upcall_mask;
+ printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
+ pending, v->evtchn_upcall_pending,
+ (int)(sizeof(v->evtchn_pending_sel)*2),
+ v->evtchn_pending_sel);
+ }
+ v = per_cpu(xen_vcpu, cpu);
+
+ printk("\npending:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)sizeof(sh->evtchn_pending[0])*2,
+ sh->evtchn_pending[i],
+ i % 8 == 0 ? "\n " : " ");
+ printk("\nglobal mask:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nglobally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocal cpu%d mask:\n ", cpu);
+ for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+ printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+ cpu_evtchn[i],
+ i % 8 == 0 ? "\n " : " ");
+
+ printk("\nlocally unmasked:\n ");
+ for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
+ xen_ulong_t pending = sh->evtchn_pending[i]
+ & ~sh->evtchn_mask[i]
+ & cpu_evtchn[i];
+ printk("%0*"PRI_xen_ulong"%s",
+ (int)(sizeof(sh->evtchn_mask[0])*2),
+ pending, i % 8 == 0 ? "\n " : " ");
+ }
+
+ printk("\npending list:\n");
+ for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+ if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+ int word_idx = i / BITS_PER_EVTCHN_WORD;
+ printk(" %d: event %d -> irq %d%s%s%s\n",
+ cpu_from_evtchn(i), i,
+ get_evtchn_to_irq(i),
+ sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+ ? "" : " l2-clear",
+ !sync_test_bit(i, BM(sh->evtchn_mask))
+ ? "" : " globally-masked",
+ sync_test_bit(i, BM(cpu_evtchn))
+ ? "" : " locally-masked");
+ }
+ }
+
+ spin_unlock_irqrestore(&debug_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static const struct evtchn_ops evtchn_ops_2l = {
+ .max_channels = evtchn_2l_max_channels,
+ .nr_channels = evtchn_2l_max_channels,
+ .bind_to_cpu = evtchn_2l_bind_to_cpu,
+ .clear_pending = evtchn_2l_clear_pending,
+ .set_pending = evtchn_2l_set_pending,
+ .is_pending = evtchn_2l_is_pending,
+ .test_and_set_mask = evtchn_2l_test_and_set_mask,
+ .mask = evtchn_2l_mask,
+ .unmask = evtchn_2l_unmask,
+ .handle_events = evtchn_2l_handle_events,
+};
+
+void __init xen_evtchn_2l_init(void)
+{
+ pr_info("Using 2-level ABI\n");
+ evtchn_ops = &evtchn_ops_2l;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea26..4672e003c0ad 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
#include <xen/interface/vcpu.h>
#include <asm/hw_irq.h>
+#include "events_internal.h"
+
+const struct evtchn_ops *evtchn_ops;
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping */
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
-/* Interrupt types. */
-enum xen_irq_type {
- IRQT_UNBOUND = 0,
- IRQT_PIRQ,
- IRQT_VIRQ,
- IRQT_IPI,
- IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- * PIRQ - physical IRQ, GSI, flags, and owner domain
- * VIRQ - virq number
- * IPI - IPI vector
- * EVTCHN -
- */
-struct irq_info {
- struct list_head list;
- int refcnt;
- enum xen_irq_type type; /* type */
- unsigned irq;
- unsigned short evtchn; /* event channel */
- unsigned short cpu; /* cpu bound */
-
- union {
- unsigned short virq;
- enum ipi_vector ipi;
- struct {
- unsigned short pirq;
- unsigned short gsi;
- unsigned char flags;
- uint16_t domid;
- } pirq;
- } u;
-};
-#define PIRQ_NEEDS_EOI (1 << 0)
-#define PIRQ_SHAREABLE (1 << 1)
-
-static int *evtchn_to_irq;
+int **evtchn_to_irq;
#ifdef CONFIG_X86
static unsigned long *pirq_eoi_map;
#endif
static bool (*pirq_needs_eoi)(unsigned irq);
-/*
- * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
- * careful to only use bitops which allow for this (e.g
- * test_bit/find_first_bit and friends but not __ffs) and to pass
- * BITS_PER_EVTCHN_WORD as the bitmask length.
- */
-#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
-/*
- * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
- * array. Primarily to avoid long lines (hence the terse name).
- */
-#define BM(x) (unsigned long *)(x)
-/* Find the first set bit in a evtchn mask */
-#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
-
-static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
- cpu_evtchn_mask);
+#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
+static void clear_evtchn_to_irq_row(unsigned row)
+{
+ unsigned col;
+
+ for (col = 0; col < EVTCHN_PER_ROW; col++)
+ evtchn_to_irq[row][col] = -1;
+}
+
+static void clear_evtchn_to_irq_all(void)
+{
+ unsigned row;
+
+ for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
+ if (evtchn_to_irq[row] == NULL)
+ continue;
+ clear_evtchn_to_irq_row(row);
+ }
+}
+
+static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+{
+ unsigned row;
+ unsigned col;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -EINVAL;
+
+ row = EVTCHN_ROW(evtchn);
+ col = EVTCHN_COL(evtchn);
+
+ if (evtchn_to_irq[row] == NULL) {
+ /* Unallocated irq entries return -1 anyway */
+ if (irq == -1)
+ return 0;
+
+ evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
+ if (evtchn_to_irq[row] == NULL)
+ return -ENOMEM;
+
+ clear_evtchn_to_irq_row(row);
+ }
+
+ evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
+ return 0;
+}
+
+int get_evtchn_to_irq(unsigned evtchn)
+{
+ if (evtchn >= xen_evtchn_max_channels())
+ return -1;
+ if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+ return -1;
+ return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+}
+
/* Get info for IRQ */
-static struct irq_info *info_for_irq(unsigned irq)
+struct irq_info *info_for_irq(unsigned irq)
{
return irq_get_handler_data(irq);
}
/* Constructors for packed IRQ information. */
-static void xen_irq_info_common_init(struct irq_info *info,
+static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
enum xen_irq_type type,
- unsigned short evtchn,
+ unsigned evtchn,
unsigned short cpu)
{
+ int ret;
BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
info->evtchn = evtchn;
info->cpu = cpu;
- evtchn_to_irq[evtchn] = irq;
+ ret = set_evtchn_to_irq(evtchn, irq);
+ if (ret < 0)
+ return ret;
irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+
+ return xen_evtchn_port_setup(info);
}
-static void xen_irq_info_evtchn_init(unsigned irq,
- unsigned short evtchn)
+static int xen_irq_info_evtchn_setup(unsigned irq,
+ unsigned evtchn)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
+ return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
}
-static void xen_irq_info_ipi_init(unsigned cpu,
+static int xen_irq_info_ipi_setup(unsigned cpu,
unsigned irq,
- unsigned short evtchn,
+ unsigned evtchn,
enum ipi_vector ipi)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
-
info->u.ipi = ipi;
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
}
-static void xen_irq_info_virq_init(unsigned cpu,
+static int xen_irq_info_virq_setup(unsigned cpu,
unsigned irq,
- unsigned short evtchn,
- unsigned short virq)
+ unsigned evtchn,
+ unsigned virq)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
-
info->u.virq = virq;
per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
}
-static void xen_irq_info_pirq_init(unsigned irq,
- unsigned short evtchn,
- unsigned short pirq,
- unsigned short gsi,
+static int xen_irq_info_pirq_setup(unsigned irq,
+ unsigned evtchn,
+ unsigned pirq,
+ unsigned gsi,
uint16_t domid,
unsigned char flags)
{
struct irq_info *info = info_for_irq(irq);
- xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
-
info->u.pirq.pirq = pirq;
info->u.pirq.gsi = gsi;
info->u.pirq.domid = domid;
info->u.pirq.flags = flags;
+
+ return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+}
+
+static void xen_irq_info_cleanup(struct irq_info *info)
+{
+ set_evtchn_to_irq(info->evtchn, -1);
+ info->evtchn = 0;
}
/*
* Accessors for packed IRQ information.
*/
-static unsigned int evtchn_from_irq(unsigned irq)
+unsigned int evtchn_from_irq(unsigned irq)
{
if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
unsigned irq_from_evtchn(unsigned int evtchn)
{
- return evtchn_to_irq[evtchn];
+ return get_evtchn_to_irq(evtchn);
}
EXPORT_SYMBOL_GPL(irq_from_evtchn);
+int irq_from_virq(unsigned int cpu, unsigned int virq)
+{
+ return per_cpu(virq_to_irq, cpu)[virq];
+}
+
static enum ipi_vector ipi_from_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
return info_for_irq(irq)->type;
}
-static unsigned cpu_from_irq(unsigned irq)
+unsigned cpu_from_irq(unsigned irq)
{
return info_for_irq(irq)->cpu;
}
-static unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
unsigned ret = 0;
if (irq != -1)
@@ -310,67 +329,29 @@ static bool pirq_needs_eoi_flag(unsigned irq)
return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}
-static inline xen_ulong_t active_evtchns(unsigned int cpu,
- struct shared_info *sh,
- unsigned int idx)
-{
- return sh->evtchn_pending[idx] &
- per_cpu(cpu_evtchn_mask, cpu)[idx] &
- ~sh->evtchn_mask[idx];
-}
-
static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
- int irq = evtchn_to_irq[chn];
+ int irq = get_evtchn_to_irq(chn);
+ struct irq_info *info = info_for_irq(irq);
BUG_ON(irq == -1);
#ifdef CONFIG_SMP
cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
#endif
- clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))));
- set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
-
- info_for_irq(irq)->cpu = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
- int i;
-#ifdef CONFIG_SMP
- struct irq_info *info;
-
- /* By default all event channels notify CPU#0. */
- list_for_each_entry(info, &xen_irq_list_head, list) {
- struct irq_desc *desc = irq_to_desc(info->irq);
- cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
- }
-#endif
-
- for_each_possible_cpu(i)
- memset(per_cpu(cpu_evtchn_mask, i),
- (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
-}
+ xen_evtchn_port_bind_to_cpu(info, cpu);
-static inline void clear_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+ info->cpu = cpu;
}
-static inline void set_evtchn(int port)
+static void xen_evtchn_mask_all(void)
{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, BM(&s->evtchn_pending[0]));
-}
+ unsigned int evtchn;
-static inline int test_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+ for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
+ mask_evtchn(evtchn);
}
-
/**
* notify_remote_via_irq - send event to remote end of event channel via irq
* @irq: irq of event channel to send event to
@@ -388,63 +369,6 @@ void notify_remote_via_irq(int irq)
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-static void mask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- sync_set_bit(port, BM(&s->evtchn_mask[0]));
-}
-
-static void unmask_evtchn(int port)
-{
- struct shared_info *s = HYPERVISOR_shared_info;
- unsigned int cpu = get_cpu();
- int do_hypercall = 0, evtchn_pending = 0;
-
- BUG_ON(!irqs_disabled());
-
- if (unlikely((cpu != cpu_from_evtchn(port))))
- do_hypercall = 1;
- else {
- /*
- * Need to clear the mask before checking pending to
- * avoid a race with an event becoming pending.
- *
- * EVTCHNOP_unmask will only trigger an upcall if the
- * mask bit was set, so if a hypercall is needed
- * remask the event.
- */
- sync_clear_bit(port, BM(&s->evtchn_mask[0]));
- evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
-
- if (unlikely(evtchn_pending && xen_hvm_domain())) {
- sync_set_bit(port, BM(&s->evtchn_mask[0]));
- do_hypercall = 1;
- }
- }
-
- /* Slow path (hypercall) if this is a non-local port or if this is
- * an hvm domain and an event is pending (hvm domains don't have
- * their own implementation of irq_enable). */
- if (do_hypercall) {
- struct evtchn_unmask unmask = { .port = port };
- (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
- } else {
- struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-
- /*
- * The following is basically the equivalent of
- * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
- * the interrupt edge' if the channel is masked.
- */
- if (evtchn_pending &&
- !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
- BM(&vcpu_info->evtchn_pending_sel)))
- vcpu_info->evtchn_upcall_pending = 1;
- }
-
- put_cpu();
-}
-
static void xen_irq_init(unsigned irq)
{
struct irq_info *info;
@@ -538,6 +462,18 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
+static void xen_evtchn_close(unsigned int port)
+{
+ struct evtchn_close close;
+
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+
+ /* Closed ports are implicitly re-bound to VCPU0. */
+ bind_evtchn_to_cpu(port, 0);
+}
+
static void pirq_query_unmask(int irq)
{
struct physdev_irq_status_query irq_status;
@@ -610,7 +546,13 @@ static unsigned int __startup_pirq(unsigned int irq)
pirq_query_unmask(irq);
- evtchn_to_irq[evtchn] = irq;
+ rc = set_evtchn_to_irq(evtchn, irq);
+ if (rc != 0) {
+ pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
+ irq, rc);
+ xen_evtchn_close(evtchn);
+ return 0;
+ }
bind_evtchn_to_cpu(evtchn, 0);
info->evtchn = evtchn;
@@ -628,10 +570,9 @@ static unsigned int startup_pirq(struct irq_data *data)
static void shutdown_pirq(struct irq_data *data)
{
- struct evtchn_close close;
unsigned int irq = data->irq;
struct irq_info *info = info_for_irq(irq);
- int evtchn = evtchn_from_irq(irq);
+ unsigned evtchn = evtchn_from_irq(irq);
BUG_ON(info->type != IRQT_PIRQ);
@@ -639,14 +580,8 @@ static void shutdown_pirq(struct irq_data *data)
return;
mask_evtchn(evtchn);
-
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- bind_evtchn_to_cpu(evtchn, 0);
- evtchn_to_irq[evtchn] = -1;
- info->evtchn = 0;
+ xen_evtchn_close(evtchn);
+ xen_irq_info_cleanup(info);
}
static void enable_pirq(struct irq_data *data)
@@ -675,6 +610,41 @@ int xen_irq_from_gsi(unsigned gsi)
}
EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
+static void __unbind_from_irq(unsigned int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+ struct irq_info *info = irq_get_handler_data(irq);
+
+ if (info->refcnt > 0) {
+ info->refcnt--;
+ if (info->refcnt != 0)
+ return;
+ }
+
+ if (VALID_EVTCHN(evtchn)) {
+ unsigned int cpu = cpu_from_irq(irq);
+
+ xen_evtchn_close(evtchn);
+
+ switch (type_from_irq(irq)) {
+ case IRQT_VIRQ:
+ per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
+ break;
+ case IRQT_IPI:
+ per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
+ break;
+ default:
+ break;
+ }
+
+ xen_irq_info_cleanup(info);
+ }
+
+ BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
+
+ xen_free_irq(irq);
+}
+
/*
* Do not make any assumptions regarding the relationship between the
* IRQ number returned here and the Xen pirq argument.
@@ -690,6 +660,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
{
int irq = -1;
struct physdev_irq irq_op;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
@@ -717,8 +688,13 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi,
goto out;
}
- xen_irq_info_pirq_init(irq, 0, pirq, gsi, DOMID_SELF,
+ ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
shareable ? PIRQ_SHAREABLE : 0);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
pirq_query_unmask(irq);
/* We try to use the handler with the appropriate semantic for the
@@ -778,7 +754,9 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
name);
- xen_irq_info_pirq_init(irq, 0, pirq, 0, domid, 0);
+ ret = xen_irq_info_pirq_setup(irq, 0, pirq, 0, domid, 0);
+ if (ret < 0)
+ goto error_irq;
ret = irq_set_msi_desc(irq, msidesc);
if (ret < 0)
goto error_irq;
@@ -786,8 +764,8 @@ out:
mutex_unlock(&irq_mapping_update_lock);
return irq;
error_irq:
+ __unbind_from_irq(irq);
mutex_unlock(&irq_mapping_update_lock);
- xen_free_irq(irq);
return ret;
}
#endif
@@ -857,13 +835,18 @@ int xen_pirq_from_irq(unsigned irq)
return pirq_from_irq(irq);
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
+
int bind_evtchn_to_irq(unsigned int evtchn)
{
int irq;
+ int ret;
+
+ if (evtchn >= xen_evtchn_max_channels())
+ return -ENOMEM;
mutex_lock(&irq_mapping_update_lock);
- irq = evtchn_to_irq[evtchn];
+ irq = get_evtchn_to_irq(evtchn);
if (irq == -1) {
irq = xen_allocate_irq_dynamic();
@@ -873,7 +856,12 @@ int bind_evtchn_to_irq(unsigned int evtchn)
irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
handle_edge_irq, "event");
- xen_irq_info_evtchn_init(irq, evtchn);
+ ret = xen_irq_info_evtchn_setup(irq, evtchn);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
} else {
struct irq_info *info = info_for_irq(irq);
WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
@@ -890,6 +878,7 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
int evtchn, irq;
+ int ret;
mutex_lock(&irq_mapping_update_lock);
@@ -909,8 +898,12 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
BUG();
evtchn = bind_ipi.port;
- xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
-
+ ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
bind_evtchn_to_cpu(evtchn, cpu);
} else {
struct irq_info *info = info_for_irq(irq);
@@ -943,7 +936,7 @@ static int find_virq(unsigned int virq, unsigned int cpu)
int port, rc = -ENOENT;
memset(&status, 0, sizeof(status));
- for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+ for (port = 0; port < xen_evtchn_max_channels(); port++) {
status.dom = DOMID_SELF;
status.port = port;
rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
@@ -959,6 +952,19 @@ static int find_virq(unsigned int virq, unsigned int cpu)
return rc;
}
+/**
+ * xen_evtchn_nr_channels - number of usable event channel ports
+ *
+ * This may be less than the maximum supported by the current
+ * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
+ * supported.
+ */
+unsigned xen_evtchn_nr_channels(void)
+{
+ return evtchn_ops->nr_channels();
+}
+EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
+
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
@@ -989,7 +995,12 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
evtchn = ret;
}
- xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+ ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
+ if (ret < 0) {
+ __unbind_from_irq(irq);
+ irq = ret;
+ goto out;
+ }
bind_evtchn_to_cpu(evtchn, cpu);
} else {
@@ -1005,50 +1016,8 @@ out:
static void unbind_from_irq(unsigned int irq)
{
- struct evtchn_close close;
- int evtchn = evtchn_from_irq(irq);
- struct irq_info *info = irq_get_handler_data(irq);
-
- if (WARN_ON(!info))
- return;
-
mutex_lock(&irq_mapping_update_lock);
-
- if (info->refcnt > 0) {
- info->refcnt--;
- if (info->refcnt != 0)
- goto done;
- }
-
- if (VALID_EVTCHN(evtchn)) {
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-
- switch (type_from_irq(irq)) {
- case IRQT_VIRQ:
- per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [virq_from_irq(irq)] = -1;
- break;
- case IRQT_IPI:
- per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
- [ipi_from_irq(irq)] = -1;
- break;
- default:
- break;
- }
-
- /* Closed ports are implicitly re-bound to VCPU0. */
- bind_evtchn_to_cpu(evtchn, 0);
-
- evtchn_to_irq[evtchn] = -1;
- }
-
- BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
- xen_free_irq(irq);
-
- done:
+ __unbind_from_irq(irq);
mutex_unlock(&irq_mapping_update_lock);
}
@@ -1148,9 +1117,26 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id)
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
+/**
+ * xen_set_irq_priority() - set an event channel priority.
+ * @irq:irq bound to an event channel.
+ * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
+ */
+int xen_set_irq_priority(unsigned irq, unsigned priority)
+{
+ struct evtchn_set_priority set_priority;
+
+ set_priority.port = evtchn_from_irq(irq);
+ set_priority.priority = priority;
+
+ return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
+ &set_priority);
+}
+EXPORT_SYMBOL_GPL(xen_set_irq_priority);
+
int evtchn_make_refcounted(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
if (irq == -1)
@@ -1175,12 +1161,12 @@ int evtchn_get(unsigned int evtchn)
struct irq_info *info;
int err = -ENOENT;
- if (evtchn >= NR_EVENT_CHANNELS)
+ if (evtchn >= xen_evtchn_max_channels())
return -EINVAL;
mutex_lock(&irq_mapping_update_lock);
- irq = evtchn_to_irq[evtchn];
+ irq = get_evtchn_to_irq(evtchn);
if (irq == -1)
goto done;
@@ -1204,7 +1190,7 @@ EXPORT_SYMBOL_GPL(evtchn_get);
void evtchn_put(unsigned int evtchn)
{
- int irq = evtchn_to_irq[evtchn];
+ int irq = get_evtchn_to_irq(evtchn);
if (WARN_ON(irq == -1))
return;
unbind_from_irq(irq);
@@ -1228,222 +1214,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
-{
- struct shared_info *sh = HYPERVISOR_shared_info;
- int cpu = smp_processor_id();
- xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
- int i;
- unsigned long flags;
- static DEFINE_SPINLOCK(debug_lock);
- struct vcpu_info *v;
-
- spin_lock_irqsave(&debug_lock, flags);
-
- printk("\nvcpu %d\n ", cpu);
-
- for_each_online_cpu(i) {
- int pending;
- v = per_cpu(xen_vcpu, i);
- pending = (get_irq_regs() && i == cpu)
- ? xen_irqs_disabled(get_irq_regs())
- : v->evtchn_upcall_mask;
- printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n ", i,
- pending, v->evtchn_upcall_pending,
- (int)(sizeof(v->evtchn_pending_sel)*2),
- v->evtchn_pending_sel);
- }
- v = per_cpu(xen_vcpu, cpu);
-
- printk("\npending:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s",
- (int)sizeof(sh->evtchn_pending[0])*2,
- sh->evtchn_pending[i],
- i % 8 == 0 ? "\n " : " ");
- printk("\nglobal mask:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nglobally unmasked:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nlocal cpu%d mask:\n ", cpu);
- for (i = (NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
- printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
- cpu_evtchn[i],
- i % 8 == 0 ? "\n " : " ");
-
- printk("\nlocally unmasked:\n ");
- for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
- xen_ulong_t pending = sh->evtchn_pending[i]
- & ~sh->evtchn_mask[i]
- & cpu_evtchn[i];
- printk("%0*"PRI_xen_ulong"%s",
- (int)(sizeof(sh->evtchn_mask[0])*2),
- pending, i % 8 == 0 ? "\n " : " ");
- }
-
- printk("\npending list:\n");
- for (i = 0; i < NR_EVENT_CHANNELS; i++) {
- if (sync_test_bit(i, BM(sh->evtchn_pending))) {
- int word_idx = i / BITS_PER_EVTCHN_WORD;
- printk(" %d: event %d -> irq %d%s%s%s\n",
- cpu_from_evtchn(i), i,
- evtchn_to_irq[i],
- sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
- ? "" : " l2-clear",
- !sync_test_bit(i, BM(sh->evtchn_mask))
- ? "" : " globally-masked",
- sync_test_bit(i, BM(cpu_evtchn))
- ? "" : " locally-masked");
- }
- }
-
- spin_unlock_irqrestore(&debug_lock, flags);
-
- return IRQ_HANDLED;
-}
-
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-static DEFINE_PER_CPU(unsigned int, current_word_idx);
-static DEFINE_PER_CPU(unsigned int, current_bit_idx);
-
-/*
- * Mask out the i least significant bits of w
- */
-#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
-/*
- * Search the CPUs pending events bitmasks. For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching. The first level is
- * a bitset of words which contain pending event bits. The second
- * level is a bitset of pending events themselves.
- */
static void __xen_evtchn_do_upcall(void)
{
- int start_word_idx, start_bit_idx;
- int word_idx, bit_idx;
- int i, irq;
- int cpu = get_cpu();
- struct shared_info *s = HYPERVISOR_shared_info;
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+ int cpu = get_cpu();
unsigned count;
do {
- xen_ulong_t pending_words;
- xen_ulong_t pending_bits;
- struct irq_desc *desc;
-
vcpu_info->evtchn_upcall_pending = 0;
if (__this_cpu_inc_return(xed_nesting_count) - 1)
goto out;
- /*
- * Master flag must be cleared /before/ clearing
- * selector flag. xchg_xen_ulong must contain an
- * appropriate barrier.
- */
- if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) {
- int evtchn = evtchn_from_irq(irq);
- word_idx = evtchn / BITS_PER_LONG;
- pending_bits = evtchn % BITS_PER_LONG;
- if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) {
- desc = irq_to_desc(irq);
- if (desc)
- generic_handle_irq_desc(irq, desc);
- }
- }
-
- pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
-
- start_word_idx = __this_cpu_read(current_word_idx);
- start_bit_idx = __this_cpu_read(current_bit_idx);
-
- word_idx = start_word_idx;
-
- for (i = 0; pending_words != 0; i++) {
- xen_ulong_t words;
-
- words = MASK_LSBS(pending_words, word_idx);
-
- /*
- * If we masked out all events, wrap to beginning.
- */
- if (words == 0) {
- word_idx = 0;
- bit_idx = 0;
- continue;
- }
- word_idx = EVTCHN_FIRST_BIT(words);
-
- pending_bits = active_evtchns(cpu, s, word_idx);
- bit_idx = 0; /* usually scan entire word from start */
- /*
- * We scan the starting word in two parts.
- *
- * 1st time: start in the middle, scanning the
- * upper bits.
- *
- * 2nd time: scan the whole word (not just the
- * parts skipped in the first pass) -- if an
- * event in the previously scanned bits is
- * pending again it would just be scanned on
- * the next loop anyway.
- */
- if (word_idx == start_word_idx) {
- if (i == 0)
- bit_idx = start_bit_idx;
- }
-
- do {
- xen_ulong_t bits;
- int port;
-
- bits = MASK_LSBS(pending_bits, bit_idx);
-
- /* If we masked out all events, move on. */
- if (bits == 0)
- break;
-
- bit_idx = EVTCHN_FIRST_BIT(bits);
-
- /* Process port. */
- port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
- irq = evtchn_to_irq[port];
-
- if (irq != -1) {
- desc = irq_to_desc(irq);
- if (desc)
- generic_handle_irq_desc(irq, desc);
- }
-
- bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
-
- /* Next caller starts at last processed + 1 */
- __this_cpu_write(current_word_idx,
- bit_idx ? word_idx :
- (word_idx+1) % BITS_PER_EVTCHN_WORD);
- __this_cpu_write(current_bit_idx, bit_idx);
- } while (bit_idx != 0);
-
- /* Scan start_l1i twice; all others once. */
- if ((word_idx != start_word_idx) || (i != 0))
- pending_words &= ~(1UL << word_idx);
-
- word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
- }
+ xen_evtchn_handle_events(cpu);
BUG_ON(!irqs_disabled());
@@ -1492,12 +1277,12 @@ void rebind_evtchn_irq(int evtchn, int irq)
mutex_lock(&irq_mapping_update_lock);
/* After resume the irq<->evtchn mappings are all cleared out */
- BUG_ON(evtchn_to_irq[evtchn] != -1);
+ BUG_ON(get_evtchn_to_irq(evtchn) != -1);
/* Expect irq to have been bound before,
so there should be a proper type */
BUG_ON(info->type == IRQT_UNBOUND);
- xen_irq_info_evtchn_init(irq, evtchn);
+ (void)xen_irq_info_evtchn_setup(irq, evtchn);
mutex_unlock(&irq_mapping_update_lock);
@@ -1511,7 +1296,6 @@ void rebind_evtchn_irq(int evtchn, int irq)
/* Rebind an evtchn so that it gets delivered to a specific cpu */
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
{
- struct shared_info *s = HYPERVISOR_shared_info;
struct evtchn_bind_vcpu bind_vcpu;
int evtchn = evtchn_from_irq(irq);
int masked;
@@ -1534,7 +1318,7 @@ static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
* Mask the event while changing the VCPU binding to prevent
* it being delivered on an unexpected VCPU.
*/
- masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+ masked = test_and_set_mask(evtchn);
/*
* If this fails, it usually just indicates that we're dealing with a
@@ -1558,22 +1342,26 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
return rebind_irq_to_cpu(data->irq, tcpu);
}
-int resend_irq_on_evtchn(unsigned int irq)
+static int retrigger_evtchn(int evtchn)
{
- int masked, evtchn = evtchn_from_irq(irq);
- struct shared_info *s = HYPERVISOR_shared_info;
+ int masked;
if (!VALID_EVTCHN(evtchn))
- return 1;
+ return 0;
- masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
- sync_set_bit(evtchn, BM(s->evtchn_pending));
+ masked = test_and_set_mask(evtchn);
+ set_evtchn(evtchn);
if (!masked)
unmask_evtchn(evtchn);
return 1;
}
+int resend_irq_on_evtchn(unsigned int irq)
+{
+ return retrigger_evtchn(evtchn_from_irq(irq));
+}
+
static void enable_dynirq(struct irq_data *data)
{
int evtchn = evtchn_from_irq(data->irq);
@@ -1608,21 +1396,7 @@ static void mask_ack_dynirq(struct irq_data *data)
static int retrigger_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(data->irq);
- struct shared_info *sh = HYPERVISOR_shared_info;
- int ret = 0;
-
- if (VALID_EVTCHN(evtchn)) {
- int masked;
-
- masked = sync_test_and_set_bit(evtchn, BM(sh->evtchn_mask));
- sync_set_bit(evtchn, BM(sh->evtchn_pending));
- if (!masked)
- unmask_evtchn(evtchn);
- ret = 1;
- }
-
- return ret;
+ return retrigger_evtchn(evtchn_from_irq(data->irq));
}
static void restore_pirqs(void)
@@ -1683,7 +1457,7 @@ static void restore_cpu_virqs(unsigned int cpu)
evtchn = bind_virq.port;
/* Record the new mapping. */
- xen_irq_info_virq_init(cpu, irq, evtchn, virq);
+ (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
@@ -1707,7 +1481,7 @@ static void restore_cpu_ipis(unsigned int cpu)
evtchn = bind_ipi.port;
/* Record the new mapping. */
- xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
+ (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
bind_evtchn_to_cpu(evtchn, cpu);
}
}
@@ -1784,21 +1558,18 @@ EXPORT_SYMBOL_GPL(xen_test_irq_shared);
void xen_irq_resume(void)
{
- unsigned int cpu, evtchn;
+ unsigned int cpu;
struct irq_info *info;
- init_evtchn_cpu_bindings();
-
/* New event-channel space is not 'live' yet. */
- for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
- mask_evtchn(evtchn);
+ xen_evtchn_mask_all();
+ xen_evtchn_resume();
/* No IRQ <-> event-channel mappings. */
list_for_each_entry(info, &xen_irq_list_head, list)
info->evtchn = 0; /* zap event-channel binding */
- for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
- evtchn_to_irq[evtchn] = -1;
+ clear_evtchn_to_irq_all();
for_each_possible_cpu(cpu) {
restore_cpu_virqs(cpu);
@@ -1889,27 +1660,40 @@ void xen_callback_vector(void)
void xen_callback_vector(void) {}
#endif
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static bool fifo_events = true;
+module_param(fifo_events, bool, 0);
+
void __init xen_init_IRQ(void)
{
- int i;
+ int ret = -EINVAL;
- evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
- GFP_KERNEL);
- BUG_ON(!evtchn_to_irq);
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- evtchn_to_irq[i] = -1;
+ if (fifo_events)
+ ret = xen_evtchn_fifo_init();
+ if (ret < 0)
+ xen_evtchn_2l_init();
- init_evtchn_cpu_bindings();
+ evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
+ sizeof(*evtchn_to_irq), GFP_KERNEL);
+ BUG_ON(!evtchn_to_irq);
/* No event channels are 'live' right now. */
- for (i = 0; i < NR_EVENT_CHANNELS; i++)
- mask_evtchn(i);
+ xen_evtchn_mask_all();
pirq_needs_eoi = pirq_needs_eoi_flag;
#ifdef CONFIG_X86
- if (xen_hvm_domain()) {
+ if (xen_pv_domain()) {
+ irq_ctx_init(smp_processor_id());
+ if (xen_initial_domain())
+ pci_xen_initial_domain();
+ }
+ if (xen_feature(XENFEAT_hvm_callback_vector))
xen_callback_vector();
+
+ if (xen_hvm_domain()) {
native_init_IRQ();
/* pci_xen_hvm_init must be called after native_init_IRQ so that
* __acpi_register_gsi can point at the right function */
@@ -1918,13 +1702,10 @@ void __init xen_init_IRQ(void)
int rc;
struct physdev_pirq_eoi_gmfn eoi_gmfn;
- irq_ctx_init(smp_processor_id());
- if (xen_initial_domain())
- pci_xen_initial_domain();
-
pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
eoi_gmfn.gmfn = virt_to_mfn(pirq_eoi_map);
rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
+ /* TODO: No PVH support for PIRQ EOI */
if (rc != 0) {
free_page((unsigned long) pirq_eoi_map);
pirq_eoi_map = NULL;
diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c
new file mode 100644
index 000000000000..1de2a191b395
--- /dev/null
+++ b/drivers/xen/events/events_fifo.c
@@ -0,0 +1,428 @@
+/*
+ * Xen event channels (FIFO-based ABI)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D ltd.
+ *
+ * This source code is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * Or, when distributed separately from the Linux kernel or
+ * incorporated into other software packages, subject to the following
+ * license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/page.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+#define EVENT_WORDS_PER_PAGE (PAGE_SIZE / sizeof(event_word_t))
+#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)
+
+struct evtchn_fifo_queue {
+ uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
+};
+
+static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
+static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
+static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
+static unsigned event_array_pages __read_mostly;
+
+#define BM(w) ((unsigned long *)(w))
+
+static inline event_word_t *event_word_from_port(unsigned port)
+{
+ unsigned i = port / EVENT_WORDS_PER_PAGE;
+
+ return event_array[i] + port % EVENT_WORDS_PER_PAGE;
+}
+
+static unsigned evtchn_fifo_max_channels(void)
+{
+ return EVTCHN_FIFO_NR_CHANNELS;
+}
+
+static unsigned evtchn_fifo_nr_channels(void)
+{
+ return event_array_pages * EVENT_WORDS_PER_PAGE;
+}
+
+static void free_unused_array_pages(void)
+{
+ unsigned i;
+
+ for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
+ if (!event_array[i])
+ break;
+ free_page((unsigned long)event_array[i]);
+ event_array[i] = NULL;
+ }
+}
+
+static void init_array_page(event_word_t *array_page)
+{
+ unsigned i;
+
+ for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
+ array_page[i] = 1 << EVTCHN_FIFO_MASKED;
+}
+
+static int evtchn_fifo_setup(struct irq_info *info)
+{
+ unsigned port = info->evtchn;
+ unsigned new_array_pages;
+ int ret;
+
+ new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;
+
+ if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
+ return -EINVAL;
+
+ while (event_array_pages < new_array_pages) {
+ void *array_page;
+ struct evtchn_expand_array expand_array;
+
+ /* Might already have a page if we've resumed. */
+ array_page = event_array[event_array_pages];
+ if (!array_page) {
+ array_page = (void *)__get_free_page(GFP_KERNEL);
+ if (array_page == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ event_array[event_array_pages] = array_page;
+ }
+
+ /* Mask all events in this page before adding it. */
+ init_array_page(array_page);
+
+ expand_array.array_gfn = virt_to_mfn(array_page);
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
+ if (ret < 0)
+ goto error;
+
+ event_array_pages++;
+ }
+ return 0;
+
+ error:
+ if (event_array_pages == 0)
+ panic("xen: unable to expand event array with initial page (%d)\n", ret);
+ else
+ pr_err("unable to expand event array (%d)\n", ret);
+ free_unused_array_pages();
+ return ret;
+}
+
+static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+ /* no-op */
+}
+
+static void evtchn_fifo_clear_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_clear_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static void evtchn_fifo_set_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_set_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static bool evtchn_fifo_is_pending(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_bit(EVTCHN_FIFO_PENDING, BM(word));
+}
+
+static bool evtchn_fifo_test_and_set_mask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ return sync_test_and_set_bit(EVTCHN_FIFO_MASKED, BM(word));
+}
+
+static void evtchn_fifo_mask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+ sync_set_bit(EVTCHN_FIFO_MASKED, BM(word));
+}
+
+/*
+ * Clear MASKED, spinning if BUSY is set.
+ */
+static void clear_masked(volatile event_word_t *word)
+{
+ event_word_t new, old, w;
+
+ w = *word;
+
+ do {
+ old = w & ~(1 << EVTCHN_FIFO_BUSY);
+ new = old & ~(1 << EVTCHN_FIFO_MASKED);
+ w = sync_cmpxchg(word, old, new);
+ } while (w != old);
+}
+
+static void evtchn_fifo_unmask(unsigned port)
+{
+ event_word_t *word = event_word_from_port(port);
+
+ BUG_ON(!irqs_disabled());
+
+ clear_masked(word);
+ if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))) {
+ struct evtchn_unmask unmask = { .port = port };
+ (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+ }
+}
+
+static uint32_t clear_linked(volatile event_word_t *word)
+{
+ event_word_t new, old, w;
+
+ w = *word;
+
+ do {
+ old = w;
+ new = (w & ~((1 << EVTCHN_FIFO_LINKED)
+ | EVTCHN_FIFO_LINK_MASK));
+ } while ((w = sync_cmpxchg(word, old, new)) != old);
+
+ return w & EVTCHN_FIFO_LINK_MASK;
+}
+
+static void handle_irq_for_port(unsigned port)
+{
+ int irq;
+ struct irq_desc *desc;
+
+ irq = get_evtchn_to_irq(port);
+ if (irq != -1) {
+ desc = irq_to_desc(irq);
+ if (desc)
+ generic_handle_irq_desc(irq, desc);
+ }
+}
+
+static void consume_one_event(unsigned cpu,
+ struct evtchn_fifo_control_block *control_block,
+ unsigned priority, uint32_t *ready)
+{
+ struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
+ uint32_t head;
+ unsigned port;
+ event_word_t *word;
+
+ head = q->head[priority];
+
+ /*
+ * Reached the tail last time? Read the new HEAD from the
+ * control block.
+ */
+ if (head == 0) {
+ rmb(); /* Ensure word is up-to-date before reading head. */
+ head = control_block->head[priority];
+ }
+
+ port = head;
+ word = event_word_from_port(port);
+ head = clear_linked(word);
+
+ /*
+ * If the link is non-zero, there are more events in the
+ * queue, otherwise the queue is empty.
+ *
+ * If the queue is empty, clear this priority from our local
+ * copy of the ready word.
+ */
+ if (head == 0)
+ clear_bit(priority, BM(ready));
+
+ if (sync_test_bit(EVTCHN_FIFO_PENDING, BM(word))
+ && !sync_test_bit(EVTCHN_FIFO_MASKED, BM(word)))
+ handle_irq_for_port(port);
+
+ q->head[priority] = head;
+}
+
+static void evtchn_fifo_handle_events(unsigned cpu)
+{
+ struct evtchn_fifo_control_block *control_block;
+ uint32_t ready;
+ unsigned q;
+
+ control_block = per_cpu(cpu_control_block, cpu);
+
+ ready = xchg(&control_block->ready, 0);
+
+ while (ready) {
+ q = find_first_bit(BM(&ready), EVTCHN_FIFO_MAX_QUEUES);
+ consume_one_event(cpu, control_block, q, &ready);
+ ready |= xchg(&control_block->ready, 0);
+ }
+}
+
+static void evtchn_fifo_resume(void)
+{
+ unsigned cpu;
+
+ for_each_possible_cpu(cpu) {
+ void *control_block = per_cpu(cpu_control_block, cpu);
+ struct evtchn_init_control init_control;
+ int ret;
+
+ if (!control_block)
+ continue;
+
+ /*
+ * If this CPU is offline, take the opportunity to
+ * free the control block while it is not being
+ * used.
+ */
+ if (!cpu_online(cpu)) {
+ free_page((unsigned long)control_block);
+ per_cpu(cpu_control_block, cpu) = NULL;
+ continue;
+ }
+
+ init_control.control_gfn = virt_to_mfn(control_block);
+ init_control.offset = 0;
+ init_control.vcpu = cpu;
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control,
+ &init_control);
+ if (ret < 0)
+ BUG();
+ }
+
+ /*
+ * The event array starts out as empty again and is extended
+ * as normal when events are bound. The existing pages will
+ * be reused.
+ */
+ event_array_pages = 0;
+}
+
+static const struct evtchn_ops evtchn_ops_fifo = {
+ .max_channels = evtchn_fifo_max_channels,
+ .nr_channels = evtchn_fifo_nr_channels,
+ .setup = evtchn_fifo_setup,
+ .bind_to_cpu = evtchn_fifo_bind_to_cpu,
+ .clear_pending = evtchn_fifo_clear_pending,
+ .set_pending = evtchn_fifo_set_pending,
+ .is_pending = evtchn_fifo_is_pending,
+ .test_and_set_mask = evtchn_fifo_test_and_set_mask,
+ .mask = evtchn_fifo_mask,
+ .unmask = evtchn_fifo_unmask,
+ .handle_events = evtchn_fifo_handle_events,
+ .resume = evtchn_fifo_resume,
+};
+
+static int evtchn_fifo_init_control_block(unsigned cpu)
+{
+ struct page *control_block = NULL;
+ struct evtchn_init_control init_control;
+ int ret = -ENOMEM;
+
+ control_block = alloc_page(GFP_KERNEL|__GFP_ZERO);
+ if (control_block == NULL)
+ goto error;
+
+ init_control.control_gfn = virt_to_mfn(page_address(control_block));
+ init_control.offset = 0;
+ init_control.vcpu = cpu;
+
+ ret = HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
+ if (ret < 0)
+ goto error;
+
+ per_cpu(cpu_control_block, cpu) = page_address(control_block);
+
+ return 0;
+
+ error:
+ __free_page(control_block);
+ return ret;
+}
+
+static int evtchn_fifo_cpu_notification(struct notifier_block *self,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+ int ret = 0;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ if (!per_cpu(cpu_control_block, cpu))
+ ret = evtchn_fifo_init_control_block(cpu);
+ break;
+ default:
+ break;
+ }
+ return ret < 0 ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static struct notifier_block evtchn_fifo_cpu_notifier = {
+ .notifier_call = evtchn_fifo_cpu_notification,
+};
+
+int __init xen_evtchn_fifo_init(void)
+{
+ int cpu = get_cpu();
+ int ret;
+
+ ret = evtchn_fifo_init_control_block(cpu);
+ if (ret < 0)
+ goto out;
+
+ pr_info("Using FIFO-based ABI\n");
+
+ evtchn_ops = &evtchn_ops_fifo;
+
+ register_cpu_notifier(&evtchn_fifo_cpu_notifier);
+out:
+ put_cpu();
+ return ret;
+}
diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h
new file mode 100644
index 000000000000..677f41a0fff9
--- /dev/null
+++ b/drivers/xen/events/events_internal.h
@@ -0,0 +1,150 @@
+/*
+ * Xen Event Channels (internal header)
+ *
+ * Copyright (C) 2013 Citrix Systems R&D Ltd.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2 or later. See the file COPYING for more details.
+ */
+#ifndef __EVENTS_INTERNAL_H__
+#define __EVENTS_INTERNAL_H__
+
+/* Interrupt types. */
+enum xen_irq_type {
+ IRQT_UNBOUND = 0,
+ IRQT_PIRQ,
+ IRQT_VIRQ,
+ IRQT_IPI,
+ IRQT_EVTCHN
+};
+
+/*
+ * Packed IRQ information:
+ * type - enum xen_irq_type
+ * event channel - irq->event channel mapping
+ * cpu - cpu this event channel is bound to
+ * index - type-specific information:
+ * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
+ * guest, or GSI (real passthrough IRQ) of the device.
+ * VIRQ - virq number
+ * IPI - IPI vector
+ * EVTCHN -
+ */
+struct irq_info {
+ struct list_head list;
+ int refcnt;
+ enum xen_irq_type type; /* type */
+ unsigned irq;
+ unsigned int evtchn; /* event channel */
+ unsigned short cpu; /* cpu bound */
+
+ union {
+ unsigned short virq;
+ enum ipi_vector ipi;
+ struct {
+ unsigned short pirq;
+ unsigned short gsi;
+ unsigned char vector;
+ unsigned char flags;
+ uint16_t domid;
+ } pirq;
+ } u;
+};
+
+#define PIRQ_NEEDS_EOI (1 << 0)
+#define PIRQ_SHAREABLE (1 << 1)
+
+struct evtchn_ops {
+ unsigned (*max_channels)(void);
+ unsigned (*nr_channels)(void);
+
+ int (*setup)(struct irq_info *info);
+ void (*bind_to_cpu)(struct irq_info *info, unsigned cpu);
+
+ void (*clear_pending)(unsigned port);
+ void (*set_pending)(unsigned port);
+ bool (*is_pending)(unsigned port);
+ bool (*test_and_set_mask)(unsigned port);
+ void (*mask)(unsigned port);
+ void (*unmask)(unsigned port);
+
+ void (*handle_events)(unsigned cpu);
+ void (*resume)(void);
+};
+
+extern const struct evtchn_ops *evtchn_ops;
+
+extern int **evtchn_to_irq;
+int get_evtchn_to_irq(unsigned int evtchn);
+
+struct irq_info *info_for_irq(unsigned irq);
+unsigned cpu_from_irq(unsigned irq);
+unsigned cpu_from_evtchn(unsigned int evtchn);
+
+static inline unsigned xen_evtchn_max_channels(void)
+{
+ return evtchn_ops->max_channels();
+}
+
+/*
+ * Do any ABI specific setup for a bound event channel before it can
+ * be unmasked and used.
+ */
+static inline int xen_evtchn_port_setup(struct irq_info *info)
+{
+ if (evtchn_ops->setup)
+ return evtchn_ops->setup(info);
+ return 0;
+}
+
+static inline void xen_evtchn_port_bind_to_cpu(struct irq_info *info,
+ unsigned cpu)
+{
+ evtchn_ops->bind_to_cpu(info, cpu);
+}
+
+static inline void clear_evtchn(unsigned port)
+{
+ evtchn_ops->clear_pending(port);
+}
+
+static inline void set_evtchn(unsigned port)
+{
+ evtchn_ops->set_pending(port);
+}
+
+static inline bool test_evtchn(unsigned port)
+{
+ return evtchn_ops->is_pending(port);
+}
+
+static inline bool test_and_set_mask(unsigned port)
+{
+ return evtchn_ops->test_and_set_mask(port);
+}
+
+static inline void mask_evtchn(unsigned port)
+{
+ return evtchn_ops->mask(port);
+}
+
+static inline void unmask_evtchn(unsigned port)
+{
+ return evtchn_ops->unmask(port);
+}
+
+static inline void xen_evtchn_handle_events(unsigned cpu)
+{
+ return evtchn_ops->handle_events(cpu);
+}
+
+static inline void xen_evtchn_resume(void)
+{
+ if (evtchn_ops->resume)
+ evtchn_ops->resume();
+}
+
+void xen_evtchn_2l_init(void);
+int xen_evtchn_fifo_init(void);
+
+#endif /* #ifndef __EVENTS_INTERNAL_H__ */
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 5de2063e16d3..00f40f051d95 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -417,7 +417,7 @@ static long evtchn_ioctl(struct file *file,
break;
rc = -EINVAL;
- if (unbind.port >= NR_EVENT_CHANNELS)
+ if (unbind.port >= xen_evtchn_nr_channels())
break;
rc = -ENOTCONN;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index e41c79c986ea..073b4a19a8b0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -846,7 +846,7 @@ static int __init gntdev_init(void)
if (!xen_domain())
return -ENODEV;
- use_ptemod = xen_pv_domain();
+ use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
err = misc_register(&gntdev_miscdev);
if (err != 0) {
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index aa846a48f400..1ce1c40331f3 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -62,12 +62,10 @@
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
-static unsigned int boot_max_nr_grant_frames;
static int gnttab_free_count;
static grant_ref_t gnttab_free_head;
static DEFINE_SPINLOCK(gnttab_list_lock);
-unsigned long xen_hvm_resume_frames;
-EXPORT_SYMBOL_GPL(xen_hvm_resume_frames);
+struct grant_frames xen_auto_xlat_grant_frames;
static union {
struct grant_entry_v1 *v1;
@@ -827,6 +825,11 @@ static unsigned int __max_nr_grant_frames(void)
unsigned int gnttab_max_grant_frames(void)
{
unsigned int xen_max = __max_nr_grant_frames();
+ static unsigned int boot_max_nr_grant_frames;
+
+ /* First time, initialize it properly. */
+ if (!boot_max_nr_grant_frames)
+ boot_max_nr_grant_frames = __max_nr_grant_frames();
if (xen_max > boot_max_nr_grant_frames)
return boot_max_nr_grant_frames;
@@ -834,6 +837,51 @@ unsigned int gnttab_max_grant_frames(void)
}
EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
+int gnttab_setup_auto_xlat_frames(unsigned long addr)
+{
+ xen_pfn_t *pfn;
+ unsigned int max_nr_gframes = __max_nr_grant_frames();
+ unsigned int i;
+ void *vaddr;
+
+ if (xen_auto_xlat_grant_frames.count)
+ return -EINVAL;
+
+ vaddr = xen_remap(addr, PAGE_SIZE * max_nr_gframes);
+ if (vaddr == NULL) {
+ pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
+ addr);
+ return -ENOMEM;
+ }
+ pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
+ if (!pfn) {
+ xen_unmap(vaddr);
+ return -ENOMEM;
+ }
+ for (i = 0; i < max_nr_gframes; i++)
+ pfn[i] = PFN_DOWN(addr) + i;
+
+ xen_auto_xlat_grant_frames.vaddr = vaddr;
+ xen_auto_xlat_grant_frames.pfn = pfn;
+ xen_auto_xlat_grant_frames.count = max_nr_gframes;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
+
+void gnttab_free_auto_xlat_frames(void)
+{
+ if (!xen_auto_xlat_grant_frames.count)
+ return;
+ kfree(xen_auto_xlat_grant_frames.pfn);
+ xen_unmap(xen_auto_xlat_grant_frames.vaddr);
+
+ xen_auto_xlat_grant_frames.pfn = NULL;
+ xen_auto_xlat_grant_frames.count = 0;
+ xen_auto_xlat_grant_frames.vaddr = NULL;
+}
+EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
+
/* Handling of paged out grant targets (GNTST_eagain) */
#define MAX_DELAY 256
static inline void
@@ -1060,10 +1108,11 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
unsigned int nr_gframes = end_idx + 1;
int rc;
- if (xen_hvm_domain()) {
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
struct xen_add_to_physmap xatp;
unsigned int i = end_idx;
rc = 0;
+ BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
/*
* Loop backwards, so that the first hypercall has the largest
* index, ensuring that the table will grow only once.
@@ -1072,7 +1121,7 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
xatp.domid = DOMID_SELF;
xatp.idx = i;
xatp.space = XENMAPSPACE_grant_table;
- xatp.gpfn = (xen_hvm_resume_frames >> PAGE_SHIFT) + i;
+ xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
if (rc != 0) {
pr_warn("grant table add_to_physmap failed, err=%d\n",
@@ -1135,10 +1184,8 @@ static void gnttab_request_version(void)
int rc;
struct gnttab_set_version gsv;
- if (xen_hvm_domain())
- gsv.version = 1;
- else
- gsv.version = 2;
+ gsv.version = 1;
+
rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
if (rc == 0 && gsv.version == 2) {
grant_table_version = 2;
@@ -1169,22 +1216,15 @@ static int gnttab_setup(void)
if (max_nr_gframes < nr_grant_frames)
return -ENOSYS;
- if (xen_pv_domain())
- return gnttab_map(0, nr_grant_frames - 1);
-
- if (gnttab_shared.addr == NULL) {
- gnttab_shared.addr = xen_remap(xen_hvm_resume_frames,
- PAGE_SIZE * max_nr_gframes);
+ if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
+ gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
if (gnttab_shared.addr == NULL) {
- pr_warn("Failed to ioremap gnttab share frames (addr=0x%08lx)!\n",
- xen_hvm_resume_frames);
+ pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
+ (unsigned long)xen_auto_xlat_grant_frames.vaddr);
return -ENOMEM;
}
}
-
- gnttab_map(0, nr_grant_frames - 1);
-
- return 0;
+ return gnttab_map(0, nr_grant_frames - 1);
}
int gnttab_resume(void)
@@ -1227,13 +1267,12 @@ int gnttab_init(void)
gnttab_request_version();
nr_grant_frames = 1;
- boot_max_nr_grant_frames = __max_nr_grant_frames();
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
BUG_ON(grefs_per_grant_frame == 0);
- max_nr_glist_frames = (boot_max_nr_grant_frames *
+ max_nr_glist_frames = (gnttab_max_grant_frames() *
grefs_per_grant_frame / RPP);
gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
@@ -1286,5 +1325,6 @@ static int __gnttab_init(void)
return gnttab_init();
}
-
-core_initcall(__gnttab_init);
+/* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
+ * beforehand to initialize xen_auto_xlat_grant_frames. */
+core_initcall_sync(__gnttab_init);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 188825122aae..dd9c249ea311 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -26,7 +26,9 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
#include "../pci/pci.h"
+#ifdef CONFIG_PCI_MMCONFIG
#include <asm/pci_x86.h>
+#endif
static bool __read_mostly pci_seg_supported = true;
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 2f3528e93cb9..a1361c312c06 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -108,6 +108,7 @@ static int platform_pci_init(struct pci_dev *pdev,
long ioaddr;
long mmio_addr, mmio_len;
unsigned int max_nr_gframes;
+ unsigned long grant_frames;
if (!xen_domain())
return -ENODEV;
@@ -154,13 +155,17 @@ static int platform_pci_init(struct pci_dev *pdev,
}
max_nr_gframes = gnttab_max_grant_frames();
- xen_hvm_resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
- ret = gnttab_init();
+ grant_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
+ ret = gnttab_setup_auto_xlat_frames(grant_frames);
if (ret)
goto out;
+ ret = gnttab_init();
+ if (ret)
+ goto grant_out;
xenbus_probe(NULL);
return 0;
-
+grant_out:
+ gnttab_free_auto_xlat_frames();
out:
pci_release_region(pdev, 0);
mem_out:
diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index ec097d6f964d..01d59e66565d 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -45,6 +45,7 @@
#include <xen/grant_table.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
+#include <xen/features.h>
#include "xenbus_probe.h"
@@ -743,7 +744,7 @@ static const struct xenbus_ring_ops ring_ops_hvm = {
void __init xenbus_ring_ops_init(void)
{
- if (xen_pv_domain())
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
ring_ops = &ring_ops_pv;
else
ring_ops = &ring_ops_hvm;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 129bf84c19ec..cb385c10d2b1 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -496,7 +496,7 @@ subsys_initcall(xenbus_probe_frontend_init);
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
- if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ if (!xen_has_pv_devices())
return -ENODEV;
ready_to_wait_for_devices = 1;
OpenPOWER on IntegriCloud