summaryrefslogtreecommitdiffstats
path: root/drivers/xen
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/xen')
-rw-r--r--drivers/xen/Kconfig63
-rw-r--r--drivers/xen/balloon.c28
-rw-r--r--drivers/xen/efi.c84
-rw-r--r--drivers/xen/events/events_base.c18
-rw-r--r--drivers/xen/gntdev-common.h10
-rw-r--r--drivers/xen/gntdev-dmabuf.c34
-rw-r--r--drivers/xen/gntdev.c256
-rw-r--r--drivers/xen/grant-table.c7
-rw-r--r--drivers/xen/mcelog.c14
-rw-r--r--drivers/xen/pci.c21
-rw-r--r--drivers/xen/platform-pci.c14
-rw-r--r--drivers/xen/preempt.c4
-rw-r--r--drivers/xen/pvcalls-back.c2
-rw-r--r--drivers/xen/swiotlb-xen.c101
-rw-r--r--drivers/xen/xen-balloon.c2
-rw-r--r--drivers/xen/xen-pciback/conf_space.c37
-rw-r--r--drivers/xen/xen-pciback/conf_space.h7
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c89
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c19
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c66
-rw-r--r--drivers/xen/xen-pciback/pciback.h1
-rw-r--r--drivers/xen/xenbus/xenbus.h2
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c20
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c56
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c39
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c24
26 files changed, 564 insertions, 454 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 79cc75096f42..61212fc7f0c7 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -106,27 +106,27 @@ config XENFS
If in doubt, say yes.
config XEN_COMPAT_XENFS
- bool "Create compatibility mount point /proc/xen"
- depends on XENFS
- default y
- help
- The old xenstore userspace tools expect to find "xenbus"
- under /proc/xen, but "xenbus" is now found at the root of the
- xenfs filesystem. Selecting this causes the kernel to create
- the compatibility mount point /proc/xen if it is running on
- a xen platform.
- If in doubt, say yes.
+ bool "Create compatibility mount point /proc/xen"
+ depends on XENFS
+ default y
+ help
+ The old xenstore userspace tools expect to find "xenbus"
+ under /proc/xen, but "xenbus" is now found at the root of the
+ xenfs filesystem. Selecting this causes the kernel to create
+ the compatibility mount point /proc/xen if it is running on
+ a xen platform.
+ If in doubt, say yes.
config XEN_SYS_HYPERVISOR
- bool "Create xen entries under /sys/hypervisor"
- depends on SYSFS
- select SYS_HYPERVISOR
- default y
- help
- Create entries under /sys/hypervisor describing the Xen
- hypervisor environment. When running native or in another
- virtual environment, /sys/hypervisor will still be present,
- but will have no xen contents.
+ bool "Create xen entries under /sys/hypervisor"
+ depends on SYSFS
+ select SYS_HYPERVISOR
+ default y
+ help
+ Create entries under /sys/hypervisor describing the Xen
+ hypervisor environment. When running native or in another
+ virtual environment, /sys/hypervisor will still be present,
+ but will have no xen contents.
config XEN_XENBUS_FRONTEND
tristate
@@ -141,7 +141,8 @@ config XEN_GNTDEV
config XEN_GNTDEV_DMABUF
bool "Add support for dma-buf grant access device driver extension"
- depends on XEN_GNTDEV && XEN_GRANT_DMA_ALLOC && DMA_SHARED_BUFFER
+ depends on XEN_GNTDEV && XEN_GRANT_DMA_ALLOC
+ select DMA_SHARED_BUFFER
help
Allows userspace processes and kernel modules to use Xen backed
dma-buf implementation. With this extension grant references to
@@ -270,7 +271,7 @@ config XEN_ACPI_PROCESSOR
depends on XEN && XEN_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
default m
help
- This ACPI processor uploads Power Management information to the Xen
+ This ACPI processor uploads Power Management information to the Xen
hypervisor.
To do that the driver parses the Power Management data and uploads
@@ -279,19 +280,19 @@ config XEN_ACPI_PROCESSOR
SMM so that other drivers (such as ACPI cpufreq scaling driver) will
not load.
- To compile this driver as a module, choose M here: the module will be
+ To compile this driver as a module, choose M here: the module will be
called xen_acpi_processor If you do not know what to choose, select
M here. If the CPUFREQ drivers are built in, select Y here.
config XEN_MCE_LOG
bool "Xen platform mcelog"
- depends on XEN_DOM0 && X86_64 && X86_MCE
+ depends on XEN_DOM0 && X86_MCE
help
Allow kernel fetching MCE error from Xen platform and
converting it into Linux mcelog format for mcelog tools
config XEN_HAVE_PVMMU
- bool
+ bool
config XEN_EFI
def_bool y
@@ -308,15 +309,15 @@ config XEN_ACPI
depends on X86 && ACPI
config XEN_SYMS
- bool "Xen symbols"
- depends on X86 && XEN_DOM0 && XENFS
- default y if KALLSYMS
- help
- Exports hypervisor symbols (along with their types and addresses) via
- /proc/xen/xensyms file, similar to /proc/kallsyms
+ bool "Xen symbols"
+ depends on X86 && XEN_DOM0 && XENFS
+ default y if KALLSYMS
+ help
+ Exports hypervisor symbols (along with their types and addresses) via
+ /proc/xen/xensyms file, similar to /proc/kallsyms
config XEN_HAVE_VPMU
- bool
+ bool
config XEN_FRONT_PGDIR_SHBUF
tristate
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4e11de6cde81..0c142bcab79d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -156,8 +156,10 @@ static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
(GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
/* balloon_append: add the given page to the balloon. */
-static void __balloon_append(struct page *page)
+static void balloon_append(struct page *page)
{
+ __SetPageOffline(page);
+
/* Lowmem is re-populated first, so highmem pages go at list tail. */
if (PageHighMem(page)) {
list_add_tail(&page->lru, &ballooned_pages);
@@ -169,11 +171,6 @@ static void __balloon_append(struct page *page)
wake_up(&balloon_wq);
}
-static void balloon_append(struct page *page)
-{
- __balloon_append(page);
-}
-
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
static struct page *balloon_retrieve(bool require_lowmem)
{
@@ -192,6 +189,7 @@ static struct page *balloon_retrieve(bool require_lowmem)
else
balloon_stats.balloon_low--;
+ __ClearPageOffline(page);
return page;
}
@@ -376,9 +374,7 @@ static void xen_online_page(struct page *page, unsigned int order)
mutex_lock(&balloon_mutex);
for (i = 0; i < size; i++) {
p = pfn_to_page(start_pfn + i);
- __online_page_set_limits(p);
- __SetPageOffline(p);
- __balloon_append(p);
+ balloon_append(p);
}
mutex_unlock(&balloon_mutex);
}
@@ -398,7 +394,8 @@ static struct notifier_block xen_memory_nb = {
#else
static enum bp_state reserve_additional_memory(void)
{
- balloon_stats.target_pages = balloon_stats.current_pages;
+ balloon_stats.target_pages = balloon_stats.current_pages +
+ balloon_stats.target_unpopulated;
return BP_ECANCELED;
}
#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
@@ -444,7 +441,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
xenmem_reservation_va_mapping_update(1, &page, &frame_list[i]);
/* Relinquish the page back to the allocator. */
- __ClearPageOffline(page);
free_reserved_page(page);
}
@@ -471,7 +467,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
state = BP_EAGAIN;
break;
}
- __SetPageOffline(page);
adjust_managed_page_count(page, -1);
xenmem_reservation_scrub_page(page);
list_add(&page->lru, &pages);
@@ -611,7 +606,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages)
while (pgno < nr_pages) {
page = balloon_retrieve(true);
if (page) {
- __ClearPageOffline(page);
pages[pgno++] = page;
#ifdef CONFIG_XEN_HAVE_PVMMU
/*
@@ -653,10 +647,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages)
mutex_lock(&balloon_mutex);
for (i = 0; i < nr_pages; i++) {
- if (pages[i]) {
- __SetPageOffline(pages[i]);
+ if (pages[i])
balloon_append(pages[i]);
- }
}
balloon_stats.target_unpopulated -= nr_pages;
@@ -674,7 +666,6 @@ static void __init balloon_add_region(unsigned long start_pfn,
unsigned long pages)
{
unsigned long pfn, extra_pfn_end;
- struct page *page;
/*
* If the amount of usable memory has been limited (e.g., with
@@ -684,11 +675,10 @@ static void __init balloon_add_region(unsigned long start_pfn,
extra_pfn_end = min(max_pfn, start_pfn + pages);
for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
- page = pfn_to_page(pfn);
/* totalram_pages and totalhigh_pages do not
include the boot-time balloon extension, so
don't subtract from it. */
- __balloon_append(page);
+ balloon_append(pfn_to_page(pfn));
}
balloon_stats.total_pages += extra_pfn_end - start_pfn;
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index 89d60f8e3c18..d1ff2186ebb4 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -40,7 +40,7 @@
#define efi_data(op) (op.u.efi_runtime_call)
-efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
+static efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
struct xen_platform_op op = INIT_EFI_OP(get_time);
@@ -61,9 +61,8 @@ efi_status_t xen_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_get_time);
-efi_status_t xen_efi_set_time(efi_time_t *tm)
+static efi_status_t xen_efi_set_time(efi_time_t *tm)
{
struct xen_platform_op op = INIT_EFI_OP(set_time);
@@ -75,10 +74,10 @@ efi_status_t xen_efi_set_time(efi_time_t *tm)
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_set_time);
-efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
- efi_time_t *tm)
+static efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled,
+ efi_bool_t *pending,
+ efi_time_t *tm)
{
struct xen_platform_op op = INIT_EFI_OP(get_wakeup_time);
@@ -98,9 +97,8 @@ efi_status_t xen_efi_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_get_wakeup_time);
-efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
+static efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
{
struct xen_platform_op op = INIT_EFI_OP(set_wakeup_time);
@@ -117,11 +115,10 @@ efi_status_t xen_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_set_wakeup_time);
-efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
- u32 *attr, unsigned long *data_size,
- void *data)
+static efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
+ u32 *attr, unsigned long *data_size,
+ void *data)
{
struct xen_platform_op op = INIT_EFI_OP(get_variable);
@@ -141,11 +138,10 @@ efi_status_t xen_efi_get_variable(efi_char16_t *name, efi_guid_t *vendor,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_get_variable);
-efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
- efi_char16_t *name,
- efi_guid_t *vendor)
+static efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
+ efi_char16_t *name,
+ efi_guid_t *vendor)
{
struct xen_platform_op op = INIT_EFI_OP(get_next_variable_name);
@@ -165,11 +161,10 @@ efi_status_t xen_efi_get_next_variable(unsigned long *name_size,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_get_next_variable);
-efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
- u32 attr, unsigned long data_size,
- void *data)
+static efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
+ u32 attr, unsigned long data_size,
+ void *data)
{
struct xen_platform_op op = INIT_EFI_OP(set_variable);
@@ -186,11 +181,10 @@ efi_status_t xen_efi_set_variable(efi_char16_t *name, efi_guid_t *vendor,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_set_variable);
-efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
- u64 *remaining_space,
- u64 *max_variable_size)
+static efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
+ u64 *remaining_space,
+ u64 *max_variable_size)
{
struct xen_platform_op op = INIT_EFI_OP(query_variable_info);
@@ -208,9 +202,8 @@ efi_status_t xen_efi_query_variable_info(u32 attr, u64 *storage_space,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_query_variable_info);
-efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
+static efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
{
struct xen_platform_op op = INIT_EFI_OP(get_next_high_monotonic_count);
@@ -221,10 +214,9 @@ efi_status_t xen_efi_get_next_high_mono_count(u32 *count)
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_get_next_high_mono_count);
-efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
- unsigned long count, unsigned long sg_list)
+static efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
+ unsigned long count, unsigned long sg_list)
{
struct xen_platform_op op = INIT_EFI_OP(update_capsule);
@@ -241,11 +233,9 @@ efi_status_t xen_efi_update_capsule(efi_capsule_header_t **capsules,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_update_capsule);
-efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
- unsigned long count, u64 *max_size,
- int *reset_type)
+static efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
+ unsigned long count, u64 *max_size, int *reset_type)
{
struct xen_platform_op op = INIT_EFI_OP(query_capsule_capabilities);
@@ -264,10 +254,9 @@ efi_status_t xen_efi_query_capsule_caps(efi_capsule_header_t **capsules,
return efi_data(op).status;
}
-EXPORT_SYMBOL_GPL(xen_efi_query_capsule_caps);
-void xen_efi_reset_system(int reset_type, efi_status_t status,
- unsigned long data_size, efi_char16_t *data)
+static void xen_efi_reset_system(int reset_type, efi_status_t status,
+ unsigned long data_size, efi_char16_t *data)
{
switch (reset_type) {
case EFI_RESET_COLD:
@@ -281,4 +270,25 @@ void xen_efi_reset_system(int reset_type, efi_status_t status,
BUG();
}
}
-EXPORT_SYMBOL_GPL(xen_efi_reset_system);
+
+/*
+ * Set XEN EFI runtime services function pointers. Other fields of struct efi,
+ * e.g. efi.systab, will be set like normal EFI.
+ */
+void __init xen_efi_runtime_setup(void)
+{
+ efi.get_time = xen_efi_get_time;
+ efi.set_time = xen_efi_set_time;
+ efi.get_wakeup_time = xen_efi_get_wakeup_time;
+ efi.set_wakeup_time = xen_efi_set_wakeup_time;
+ efi.get_variable = xen_efi_get_variable;
+ efi.get_next_variable = xen_efi_get_next_variable;
+ efi.set_variable = xen_efi_set_variable;
+ efi.set_variable_nonblocking = xen_efi_set_variable;
+ efi.query_variable_info = xen_efi_query_variable_info;
+ efi.query_variable_info_nonblocking = xen_efi_query_variable_info;
+ efi.update_capsule = xen_efi_update_capsule;
+ efi.query_capsule_caps = xen_efi_query_capsule_caps;
+ efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count;
+ efi.reset_system = xen_efi_reset_system;
+}
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 2e8570c09789..499eff7d3f65 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -247,7 +247,7 @@ static void xen_irq_info_cleanup(struct irq_info *info)
*/
unsigned int evtchn_from_irq(unsigned irq)
{
- if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)))
+ if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))
return 0;
return info_for_irq(irq)->evtchn;
@@ -1213,31 +1213,21 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
notify_remote_via_irq(irq);
}
-static DEFINE_PER_CPU(unsigned, xed_nesting_count);
-
static void __xen_evtchn_do_upcall(void)
{
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
- int cpu = get_cpu();
- unsigned count;
+ int cpu = smp_processor_id();
do {
vcpu_info->evtchn_upcall_pending = 0;
- if (__this_cpu_inc_return(xed_nesting_count) - 1)
- goto out;
-
xen_evtchn_handle_events(cpu);
BUG_ON(!irqs_disabled());
- count = __this_cpu_read(xed_nesting_count);
- __this_cpu_write(xed_nesting_count, 0);
- } while (count != 1 || vcpu_info->evtchn_upcall_pending);
-
-out:
+ virt_rmb(); /* Hypervisor can set upcall pending. */
- put_cpu();
+ } while (vcpu_info->evtchn_upcall_pending);
}
void xen_evtchn_do_upcall(struct pt_regs *regs)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949c3eeb..9a3960ecff6c 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -21,15 +21,8 @@ struct gntdev_dmabuf_priv;
struct gntdev_priv {
/* Maps with visible offsets in the file descriptor. */
struct list_head maps;
- /*
- * Maps that are not visible; will be freed on munmap.
- * Only populated if populate_freeable_maps == 1
- */
- struct list_head freeable_maps;
/* lock protects maps and freeable_maps. */
struct mutex lock;
- struct mm_struct *mm;
- struct mmu_notifier mn;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
/* Device for which DMA memory is allocated. */
@@ -49,6 +42,7 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ struct mmu_interval_notifier notifier;
struct list_head next;
struct vm_area_struct *vma;
int index;
@@ -87,7 +81,7 @@ void gntdev_add_map(struct gntdev_priv *priv, struct gntdev_grant_map *add);
void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map);
-bool gntdev_account_mapped_pages(int count);
+bool gntdev_test_page_count(unsigned int count);
int gntdev_map_grant_pages(struct gntdev_grant_map *map);
diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c
index 2c4f324f8626..75d3bb948bf3 100644
--- a/drivers/xen/gntdev-dmabuf.c
+++ b/drivers/xen/gntdev-dmabuf.c
@@ -342,35 +342,12 @@ static void dmabuf_exp_ops_release(struct dma_buf *dma_buf)
mutex_unlock(&priv->lock);
}
-static void *dmabuf_exp_ops_kmap(struct dma_buf *dma_buf,
- unsigned long page_num)
-{
- /* Not implemented. */
- return NULL;
-}
-
-static void dmabuf_exp_ops_kunmap(struct dma_buf *dma_buf,
- unsigned long page_num, void *addr)
-{
- /* Not implemented. */
-}
-
-static int dmabuf_exp_ops_mmap(struct dma_buf *dma_buf,
- struct vm_area_struct *vma)
-{
- /* Not implemented. */
- return 0;
-}
-
static const struct dma_buf_ops dmabuf_exp_ops = {
.attach = dmabuf_exp_ops_attach,
.detach = dmabuf_exp_ops_detach,
.map_dma_buf = dmabuf_exp_ops_map_dma_buf,
.unmap_dma_buf = dmabuf_exp_ops_unmap_dma_buf,
.release = dmabuf_exp_ops_release,
- .map = dmabuf_exp_ops_kmap,
- .unmap = dmabuf_exp_ops_kunmap,
- .mmap = dmabuf_exp_ops_mmap,
};
struct gntdev_dmabuf_export_args {
@@ -446,7 +423,7 @@ dmabuf_exp_alloc_backing_storage(struct gntdev_priv *priv, int dmabuf_flags,
{
struct gntdev_grant_map *map;
- if (unlikely(count <= 0))
+ if (unlikely(gntdev_test_page_count(count)))
return ERR_PTR(-EINVAL);
if ((dmabuf_flags & GNTDEV_DMA_FLAG_WC) &&
@@ -459,11 +436,6 @@ dmabuf_exp_alloc_backing_storage(struct gntdev_priv *priv, int dmabuf_flags,
if (!map)
return ERR_PTR(-ENOMEM);
- if (unlikely(gntdev_account_mapped_pages(count))) {
- pr_debug("can't map %d pages: over limit\n", count);
- gntdev_put_map(NULL, map);
- return ERR_PTR(-ENOMEM);
- }
return map;
}
@@ -771,7 +743,7 @@ long gntdev_ioctl_dmabuf_exp_from_refs(struct gntdev_priv *priv, int use_ptemod,
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
- if (unlikely(op.count <= 0))
+ if (unlikely(gntdev_test_page_count(op.count)))
return -EINVAL;
refs = kcalloc(op.count, sizeof(*refs), GFP_KERNEL);
@@ -818,7 +790,7 @@ long gntdev_ioctl_dmabuf_imp_to_refs(struct gntdev_priv *priv,
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
- if (unlikely(op.count <= 0))
+ if (unlikely(gntdev_test_page_count(op.count)))
return -EINVAL;
gntdev_dmabuf = dmabuf_imp_to_refs(priv->dmabuf_priv,
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a446a7221e13..0258415ca0b2 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -22,6 +22,7 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -34,9 +35,6 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
-#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
-#include <linux/of_device.h>
-#endif
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -57,15 +55,12 @@ MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
"Gerd Hoffmann <kraxel@redhat.com>");
MODULE_DESCRIPTION("User-space granted page access driver");
-static int limit = 1024*1024;
-module_param(limit, int, 0644);
-MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
- "the gntdev device");
-
-static atomic_t pages_mapped = ATOMIC_INIT(0);
+static unsigned int limit = 64*1024;
+module_param(limit, uint, 0644);
+MODULE_PARM_DESC(limit,
+ "Maximum number of grants that may be mapped by one mapping request");
static int use_ptemod;
-#define populate_freeable_maps use_ptemod
static int unmap_grant_pages(struct gntdev_grant_map *map,
int offset, int pages);
@@ -74,9 +69,9 @@ static struct miscdevice gntdev_miscdev;
/* ------------------------------------------------------------------ */
-bool gntdev_account_mapped_pages(int count)
+bool gntdev_test_page_count(unsigned int count)
{
- return atomic_add_return(count, &pages_mapped) > limit;
+ return !count || count > limit;
}
static void gntdev_print_maps(struct gntdev_priv *priv,
@@ -117,14 +112,14 @@ static void gntdev_free_map(struct gntdev_grant_map *map)
gnttab_free_pages(map->count, map->pages);
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
- kfree(map->frames);
+ kvfree(map->frames);
#endif
- kfree(map->pages);
- kfree(map->grants);
- kfree(map->map_ops);
- kfree(map->unmap_ops);
- kfree(map->kmap_ops);
- kfree(map->kunmap_ops);
+ kvfree(map->pages);
+ kvfree(map->grants);
+ kvfree(map->map_ops);
+ kvfree(map->unmap_ops);
+ kvfree(map->kmap_ops);
+ kvfree(map->kunmap_ops);
kfree(map);
}
@@ -138,12 +133,13 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
if (NULL == add)
return NULL;
- add->grants = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
- add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
- add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
- add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
- add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
- add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->grants = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
+ add->map_ops = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
+ add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
+ add->kmap_ops = kvcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
+ add->kunmap_ops = kvcalloc(count,
+ sizeof(add->kunmap_ops[0]), GFP_KERNEL);
+ add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
@@ -162,8 +158,8 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) {
struct gnttab_dma_alloc_args args;
- add->frames = kcalloc(count, sizeof(add->frames[0]),
- GFP_KERNEL);
+ add->frames = kvcalloc(count, sizeof(add->frames[0]),
+ GFP_KERNEL);
if (!add->frames)
goto err;
@@ -244,19 +240,11 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
if (!refcount_dec_and_test(&map->users))
return;
- atomic_sub(map->count, &pages_mapped);
-
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
}
- if (populate_freeable_maps && priv) {
- mutex_lock(&priv->lock);
- list_del(&map->next);
- mutex_unlock(&priv->lock);
- }
-
if (map->pages && !use_ptemod)
unmap_grant_pages(map, 0, map->count);
gntdev_free_map(map);
@@ -446,16 +434,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
pr_debug("gntdev_vma_close %p\n", vma);
if (use_ptemod) {
- /* It is possible that an mmu notifier could be running
- * concurrently, so take priv->lock to ensure that the vma won't
- * vanishing during the unmap_grant_pages call, since we will
- * spin here until that completes. Such a concurrent call will
- * not do any unmapping, since that has been done prior to
- * closing the vma, but it may still iterate the unmap_ops list.
- */
- mutex_lock(&priv->lock);
+ WARN_ON(map->vma != vma);
+ mmu_interval_notifier_remove(&map->notifier);
map->vma = NULL;
- mutex_unlock(&priv->lock);
}
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
@@ -477,109 +458,44 @@ static const struct vm_operations_struct gntdev_vmops = {
/* ------------------------------------------------------------------ */
-static bool in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end)
-{
- if (!map->vma)
- return false;
- if (map->vma->vm_start >= end)
- return false;
- if (map->vma->vm_end <= start)
- return false;
-
- return true;
-}
-
-static int unmap_if_in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end,
- bool blockable)
+static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
+ struct gntdev_grant_map *map =
+ container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
int err;
- if (!in_range(map, start, end))
- return 0;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- if (!blockable)
- return -EAGAIN;
+ /*
+ * If the VMA is split or otherwise changed the notifier is not
+ * updated, but we don't want to process VA's outside the modified
+ * VMA. FIXME: It would be much more understandable to just prevent
+ * modifying the VMA in the first place.
+ */
+ if (map->vma->vm_start >= range->end ||
+ map->vma->vm_end <= range->start)
+ return true;
- mstart = max(start, map->vma->vm_start);
- mend = min(end, map->vma->vm_end);
+ mstart = max(range->start, map->vma->vm_start);
+ mend = min(range->end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
- start, end, mstart, mend);
+ range->start, range->end, mstart, mend);
err = unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
- return 0;
-}
-
-static int mn_invl_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
- struct gntdev_grant_map *map;
- int ret = 0;
-
- if (mmu_notifier_range_blockable(range))
- mutex_lock(&priv->lock);
- else if (!mutex_trylock(&priv->lock))
- return -EAGAIN;
-
- list_for_each_entry(map, &priv->maps, next) {
- ret = unmap_if_in_range(map, range->start, range->end,
- mmu_notifier_range_blockable(range));
- if (ret)
- goto out_unlock;
- }
- list_for_each_entry(map, &priv->freeable_maps, next) {
- ret = unmap_if_in_range(map, range->start, range->end,
- mmu_notifier_range_blockable(range));
- if (ret)
- goto out_unlock;
- }
-
-out_unlock:
- mutex_unlock(&priv->lock);
-
- return ret;
-}
-
-static void mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
- struct gntdev_grant_map *map;
- int err;
-
- mutex_lock(&priv->lock);
- list_for_each_entry(map, &priv->maps, next) {
- if (!map->vma)
- continue;
- pr_debug("map %d+%d (%lx %lx)\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
- }
- list_for_each_entry(map, &priv->freeable_maps, next) {
- if (!map->vma)
- continue;
- pr_debug("map %d+%d (%lx %lx)\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
- }
- mutex_unlock(&priv->lock);
+ return true;
}
-static const struct mmu_notifier_ops gntdev_mmu_ops = {
- .release = mn_release,
- .invalidate_range_start = mn_invl_range_start,
+static const struct mmu_interval_notifier_ops gntdev_mmu_ops = {
+ .invalidate = gntdev_invalidate,
};
/* ------------------------------------------------------------------ */
@@ -587,52 +503,28 @@ static const struct mmu_notifier_ops gntdev_mmu_ops = {
static int gntdev_open(struct inode *inode, struct file *flip)
{
struct gntdev_priv *priv;
- int ret = 0;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
INIT_LIST_HEAD(&priv->maps);
- INIT_LIST_HEAD(&priv->freeable_maps);
mutex_init(&priv->lock);
#ifdef CONFIG_XEN_GNTDEV_DMABUF
priv->dmabuf_priv = gntdev_dmabuf_init(flip);
if (IS_ERR(priv->dmabuf_priv)) {
- ret = PTR_ERR(priv->dmabuf_priv);
- kfree(priv);
- return ret;
- }
-#endif
+ int ret = PTR_ERR(priv->dmabuf_priv);
- if (use_ptemod) {
- priv->mm = get_task_mm(current);
- if (!priv->mm) {
- kfree(priv);
- return -ENOMEM;
- }
- priv->mn.ops = &gntdev_mmu_ops;
- ret = mmu_notifier_register(&priv->mn, priv->mm);
- mmput(priv->mm);
- }
-
- if (ret) {
kfree(priv);
return ret;
}
+#endif
flip->private_data = priv;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
priv->dma_dev = gntdev_miscdev.this_device;
-
- /*
- * The device is not spawn from a device tree, so arch_setup_dma_ops
- * is not called, thus leaving the device with dummy DMA ops.
- * Fix this by calling of_dma_configure() with a NULL node to set
- * default DMA ops.
- */
- of_dma_configure(priv->dma_dev, NULL, true);
+ dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64));
#endif
pr_debug("priv %p\n", priv);
@@ -653,16 +545,12 @@ static int gntdev_release(struct inode *inode, struct file *flip)
list_del(&map->next);
gntdev_put_map(NULL /* already removed */, map);
}
- WARN_ON(!list_empty(&priv->freeable_maps));
mutex_unlock(&priv->lock);
#ifdef CONFIG_XEN_GNTDEV_DMABUF
gntdev_dmabuf_fini(priv->dmabuf_priv);
#endif
- if (use_ptemod)
- mmu_notifier_unregister(&priv->mn, priv->mm);
-
kfree(priv);
return 0;
}
@@ -677,7 +565,7 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
if (copy_from_user(&op, u, sizeof(op)) != 0)
return -EFAULT;
pr_debug("priv %p, add %d\n", priv, op.count);
- if (unlikely(op.count <= 0))
+ if (unlikely(gntdev_test_page_count(op.count)))
return -EINVAL;
err = -ENOMEM;
@@ -685,12 +573,6 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
if (!map)
return err;
- if (unlikely(gntdev_account_mapped_pages(op.count))) {
- pr_debug("can't map: over limit\n");
- gntdev_put_map(NULL, map);
- return err;
- }
-
if (copy_from_user(map->grants, &u->refs,
sizeof(map->grants[0]) * op.count) != 0) {
gntdev_put_map(NULL, map);
@@ -723,8 +605,6 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
- if (populate_freeable_maps)
- list_add_tail(&map->next, &priv->freeable_maps);
err = 0;
}
mutex_unlock(&priv->lock);
@@ -1096,11 +976,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
- if (use_ptemod && priv->mm != vma->vm_mm) {
- pr_warn("Huh? Other mm?\n");
- goto unlock_out;
- }
-
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1111,10 +986,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
vma->vm_flags |= VM_DONTCOPY;
vma->vm_private_data = map;
-
- if (use_ptemod)
- map->vma = vma;
-
if (map->flags) {
if ((vma->vm_flags & VM_WRITE) &&
(map->flags & GNTMAP_readonly))
@@ -1125,9 +996,29 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ if (use_ptemod) {
+ map->vma = vma;
+ err = mmu_interval_notifier_insert_locked(
+ &map->notifier, vma->vm_mm, vma->vm_start,
+ vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
+ if (err)
+ goto out_unlock_put;
+ }
mutex_unlock(&priv->lock);
if (use_ptemod) {
+ /*
+ * gntdev takes the address of the PTE in find_grant_ptes() and
+ * passes it to the hypervisor in gntdev_map_grant_pages(). The
+ * purpose of the notifier is to prevent the hypervisor pointer
+ * to the PTE from going stale.
+ *
+ * Since this vma's mappings can't be touched without the
+ * mmap_sem, and we are holding it now, there is no need for
+ * the notifier_range locking pattern.
+ */
+ mmu_interval_read_begin(&map->notifier);
+
map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
@@ -1175,8 +1066,11 @@ out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
if (use_ptemod) {
- map->vma = NULL;
unmap_grant_pages(map, 0, map->count);
+ if (map->vma) {
+ mmu_interval_notifier_remove(&map->notifier);
+ map->vma = NULL;
+ }
}
gntdev_put_map(priv, map);
return err;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7ea6fb6a2e5d..7b36b51cdb9f 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -664,7 +664,6 @@ static int grow_gnttab_list(unsigned int more_frames)
unsigned int nr_glist_frames, new_nr_glist_frames;
unsigned int grefs_per_frame;
- BUG_ON(gnttab_interface == NULL);
grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
new_nr_grant_frames = nr_grant_frames + more_frames;
@@ -1160,7 +1159,6 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
static unsigned int nr_status_frames(unsigned int nr_grant_frames)
{
- BUG_ON(gnttab_interface == NULL);
return gnttab_frames(nr_grant_frames, SPP);
}
@@ -1363,8 +1361,7 @@ static int gnttab_setup(void)
if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
if (gnttab_shared.addr == NULL) {
- pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
- (unsigned long)xen_auto_xlat_grant_frames.vaddr);
+ pr_warn("gnttab share frames is not mapped!\n");
return -ENOMEM;
}
}
@@ -1389,7 +1386,6 @@ static int gnttab_expand(unsigned int req_entries)
int rc;
unsigned int cur, extra;
- BUG_ON(gnttab_interface == NULL);
cur = nr_grant_frames;
extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
gnttab_interface->grefs_per_grant_frame);
@@ -1424,7 +1420,6 @@ int gnttab_init(void)
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
- BUG_ON(gnttab_interface == NULL);
max_nr_glist_frames = (max_nr_grant_frames *
gnttab_interface->grefs_per_grant_frame / RPP);
diff --git a/drivers/xen/mcelog.c b/drivers/xen/mcelog.c
index b8bf61abb65b..e9ac3b8c4167 100644
--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -222,7 +222,7 @@ static int convert_log(struct mc_info *mi)
struct mcinfo_global *mc_global;
struct mcinfo_bank *mc_bank;
struct xen_mce m;
- uint32_t i;
+ unsigned int i, j;
mic = NULL;
x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
@@ -248,7 +248,17 @@ static int convert_log(struct mc_info *mi)
m.socketid = g_physinfo[i].mc_chipid;
m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
- m.mcgcap = g_physinfo[i].mc_msrvalues[__MC_MSR_MCGCAP].value;
+ for (j = 0; j < g_physinfo[i].mc_nmsrvals; ++j)
+ switch (g_physinfo[i].mc_msrvalues[j].reg) {
+ case MSR_IA32_MCG_CAP:
+ m.mcgcap = g_physinfo[i].mc_msrvalues[j].value;
+ break;
+
+ case MSR_PPIN:
+ case MSR_AMD_PPIN:
+ m.ppin = g_physinfo[i].mc_msrvalues[j].value;
+ break;
+ }
mic = NULL;
x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index 3eeb9bea7630..224df03ce42e 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -17,6 +17,8 @@
#include "../pci/pci.h"
#ifdef CONFIG_PCI_MMCONFIG
#include <asm/pci_x86.h>
+
+static int xen_mcfg_late(void);
#endif
static bool __read_mostly pci_seg_supported = true;
@@ -28,7 +30,18 @@ static int xen_add_device(struct device *dev)
#ifdef CONFIG_PCI_IOV
struct pci_dev *physfn = pci_dev->physfn;
#endif
-
+#ifdef CONFIG_PCI_MMCONFIG
+ static bool pci_mcfg_reserved = false;
+ /*
+ * Reserve MCFG areas in Xen on first invocation due to this being
+ * potentially called from inside of acpi_init immediately after
+ * MCFG table has been finally parsed.
+ */
+ if (!pci_mcfg_reserved) {
+ xen_mcfg_late();
+ pci_mcfg_reserved = true;
+ }
+#endif
if (pci_seg_supported) {
struct {
struct physdev_pci_device_add add;
@@ -201,7 +214,7 @@ static int __init register_xen_pci_notifier(void)
arch_initcall(register_xen_pci_notifier);
#ifdef CONFIG_PCI_MMCONFIG
-static int __init xen_mcfg_late(void)
+static int xen_mcfg_late(void)
{
struct pci_mmcfg_region *cfg;
int rc;
@@ -240,8 +253,4 @@ static int __init xen_mcfg_late(void)
}
return 0;
}
-/*
- * Needs to be done after acpi_init which are subsys_initcall.
- */
-subsys_initcall_sync(xen_mcfg_late);
#endif
diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c
index 5e30602fdbad..59e85e408c23 100644
--- a/drivers/xen/platform-pci.c
+++ b/drivers/xen/platform-pci.c
@@ -74,7 +74,7 @@ static int xen_allocate_irq(struct pci_dev *pdev)
"xen-platform-pci", pdev);
}
-static int platform_pci_resume(struct pci_dev *pdev)
+static int platform_pci_resume(struct device *dev)
{
int err;
@@ -83,7 +83,7 @@ static int platform_pci_resume(struct pci_dev *pdev)
err = xen_set_callback_via(callback_via);
if (err) {
- dev_err(&pdev->dev, "platform_pci_resume failure!\n");
+ dev_err(dev, "platform_pci_resume failure!\n");
return err;
}
return 0;
@@ -168,13 +168,17 @@ static const struct pci_device_id platform_pci_tbl[] = {
{0,}
};
+static struct dev_pm_ops platform_pm_ops = {
+ .resume_noirq = platform_pci_resume,
+};
+
static struct pci_driver platform_driver = {
.name = DRV_NAME,
.probe = platform_pci_probe,
.id_table = platform_pci_tbl,
-#ifdef CONFIG_PM
- .resume_early = platform_pci_resume,
-#endif
+ .driver = {
+ .pm = &platform_pm_ops,
+ },
};
builtin_pci_driver(platform_driver);
diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c
index 8b9919c26095..70650b248de5 100644
--- a/drivers/xen/preempt.c
+++ b/drivers/xen/preempt.c
@@ -8,7 +8,7 @@
#include <linux/sched.h>
#include <xen/xen-ops.h>
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
/*
* Some hypercalls issued by the toolstack can take many 10s of
@@ -37,4 +37,4 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void)
__this_cpu_write(xen_in_preemptible_hcall, true);
}
}
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 69a626b0e594..c57c71b7d53d 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -775,7 +775,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev,
mappass->reqcopy = *req;
icsk = inet_csk(mappass->sock->sk);
queue = &icsk->icsk_accept_queue;
- data = queue->rskq_accept_head != NULL;
+ data = READ_ONCE(queue->rskq_accept_head) != NULL;
if (data) {
mappass->reqcopy.cmd = 0;
ret = 0;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ae1df496bf38..b6d27762c6f8 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -28,6 +28,7 @@
#include <linux/memblock.h>
#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
#include <linux/export.h>
#include <xen/swiotlb-xen.h>
#include <xen/page.h>
@@ -38,6 +39,7 @@
#include <asm/xen/page-coherent.h>
#include <trace/events/swiotlb.h>
+#define MAX_DMA_BITS 32
/*
* Used to do a quick range check in swiotlb_tbl_unmap_single and
* swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -114,8 +116,6 @@ static int is_xen_swiotlb_buffer(dma_addr_t dma_addr)
return 0;
}
-static int max_dma_bits = 32;
-
static int
xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
{
@@ -135,7 +135,7 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
p + (i << IO_TLB_SHIFT),
get_order(slabs << IO_TLB_SHIFT),
dma_bits, &dma_handle);
- } while (rc && dma_bits++ < max_dma_bits);
+ } while (rc && dma_bits++ < MAX_DMA_BITS);
if (rc)
return rc;
@@ -375,7 +375,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (dma_capable(dev, dev_addr, size) &&
+ if (dma_capable(dev, dev_addr, size, true) &&
!range_straddles_page_boundary(phys, size) &&
!xen_arch_need_swiotlb(dev, phys, dev_addr) &&
swiotlb_force != SWIOTLB_FORCE)
@@ -386,30 +386,26 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
*/
trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
- map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
- attrs);
+ map = swiotlb_tbl_map_single(dev, start_dma_addr, phys,
+ size, size, dir, attrs);
if (map == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
+ phys = map;
dev_addr = xen_phys_to_bus(map);
/*
* Ensure that the address returned is DMA'ble
*/
- if (unlikely(!dma_capable(dev, dev_addr, size))) {
- swiotlb_tbl_unmap_single(dev, map, size, dir,
+ if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
+ swiotlb_tbl_unmap_single(dev, map, size, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return DMA_MAPPING_ERROR;
}
- page = pfn_to_page(map >> PAGE_SHIFT);
- offset = map & ~PAGE_MASK;
done:
- /*
- * we are not interested in the dma_addr returned by xen_dma_map_page,
- * only in the potential cache flushes executed by the function.
- */
- xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs);
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ xen_dma_sync_for_device(dev_addr, phys, size, dir);
return dev_addr;
}
@@ -421,26 +417,19 @@ done:
* After this call, reads by the cpu to the buffer are guaranteed to see
* whatever the device wrote there.
*/
-static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t paddr = xen_bus_to_phys(dev_addr);
BUG_ON(dir == DMA_NONE);
- xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs);
+ if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ xen_dma_sync_for_cpu(dev_addr, paddr, size, dir);
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(dev_addr))
- swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
-}
-
-static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- xen_unmap_single(hwdev, dev_addr, size, dir, attrs);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
}
static void
@@ -449,7 +438,8 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
{
phys_addr_t paddr = xen_bus_to_phys(dma_addr);
- xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir);
+ if (!dev_is_dma_coherent(dev))
+ xen_dma_sync_for_cpu(dma_addr, paddr, size, dir);
if (is_xen_swiotlb_buffer(dma_addr))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
@@ -464,7 +454,8 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
if (is_xen_swiotlb_buffer(dma_addr))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
- xen_dma_sync_single_for_device(dev, dma_addr, size, dir);
+ if (!dev_is_dma_coherent(dev))
+ xen_dma_sync_for_device(dma_addr, paddr, size, dir);
}
/*
@@ -481,7 +472,8 @@ xen_swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i)
- xen_unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs);
+ xen_swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg),
+ dir, attrs);
}
@@ -547,51 +539,6 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
return xen_virt_to_bus(xen_io_tlb_end - 1) <= mask;
}
-/*
- * Create userspace mapping for the DMA-coherent memory.
- * This function should be called with the pages from the current domain only,
- * passing pages mapped from other domains would lead to memory corruption.
- */
-static int
-xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
-#ifdef CONFIG_ARM
- if (xen_get_dma_ops(dev)->mmap)
- return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr,
- dma_addr, size, attrs);
-#endif
- return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-}
-
-/*
- * This function should be called with the pages from the current domain only,
- * passing pages mapped from other domains would lead to memory corruption.
- */
-static int
-xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
-{
-#ifdef CONFIG_ARM
- if (xen_get_dma_ops(dev)->get_sgtable) {
-#if 0
- /*
- * This check verifies that the page belongs to the current domain and
- * is not one mapped from another domain.
- * This check is for debug only, and should not go to production build
- */
- unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle));
- BUG_ON (!page_is_ram(bfn));
-#endif
- return xen_get_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr,
- handle, size, attrs);
- }
-#endif
- return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
-}
-
const struct dma_map_ops xen_swiotlb_dma_ops = {
.alloc = xen_swiotlb_alloc_coherent,
.free = xen_swiotlb_free_coherent,
@@ -604,6 +551,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = {
.map_page = xen_swiotlb_map_page,
.unmap_page = xen_swiotlb_unmap_page,
.dma_supported = xen_swiotlb_dma_supported,
- .mmap = xen_swiotlb_dma_mmap,
- .get_sgtable = xen_swiotlb_get_sgtable,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 6d12fc368210..a8d24433c8e9 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -94,7 +94,7 @@ static void watch_target(struct xenbus_watch *watch,
"%llu", &static_max) == 1))
static_max >>= PAGE_SHIFT - 10;
else
- static_max = new_target;
+ static_max = balloon_stats.current_pages;
target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0
: static_max - balloon_stats.target_pages;
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 60111719b01f..b20e43e148ce 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -286,6 +286,43 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
return xen_pcibios_err_to_errno(err);
}
+int xen_pcibk_get_interrupt_type(struct pci_dev *dev)
+{
+ int err;
+ u16 val;
+ int ret = 0;
+
+ err = pci_read_config_word(dev, PCI_COMMAND, &val);
+ if (err)
+ return err;
+ if (!(val & PCI_COMMAND_INTX_DISABLE))
+ ret |= INTERRUPT_TYPE_INTX;
+
+ /*
+ * Do not trust dev->msi(x)_enabled here, as enabling could be done
+ * bypassing the pci_*msi* functions, by the qemu.
+ */
+ if (dev->msi_cap) {
+ err = pci_read_config_word(dev,
+ dev->msi_cap + PCI_MSI_FLAGS,
+ &val);
+ if (err)
+ return err;
+ if (val & PCI_MSI_FLAGS_ENABLE)
+ ret |= INTERRUPT_TYPE_MSI;
+ }
+ if (dev->msix_cap) {
+ err = pci_read_config_word(dev,
+ dev->msix_cap + PCI_MSIX_FLAGS,
+ &val);
+ if (err)
+ return err;
+ if (val & PCI_MSIX_FLAGS_ENABLE)
+ ret |= INTERRUPT_TYPE_MSIX;
+ }
+ return ret;
+}
+
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
index 22db630717ea..28c45180a12e 100644
--- a/drivers/xen/xen-pciback/conf_space.h
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -65,6 +65,11 @@ struct config_field_entry {
void *data;
};
+#define INTERRUPT_TYPE_NONE (1<<0)
+#define INTERRUPT_TYPE_INTX (1<<1)
+#define INTERRUPT_TYPE_MSI (1<<2)
+#define INTERRUPT_TYPE_MSIX (1<<3)
+
extern bool xen_pcibk_permissive;
#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
@@ -126,4 +131,6 @@ int xen_pcibk_config_capability_init(void);
int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
+int xen_pcibk_get_interrupt_type(struct pci_dev *dev);
+
#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
index e5694133ebe5..22f13abbe913 100644
--- a/drivers/xen/xen-pciback/conf_space_capability.c
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -189,6 +189,85 @@ static const struct config_field caplist_pm[] = {
{}
};
+static struct msi_msix_field_config {
+ u16 enable_bit; /* bit for enabling MSI/MSI-X */
+ unsigned int int_type; /* interrupt type for exclusiveness check */
+} msi_field_config = {
+ .enable_bit = PCI_MSI_FLAGS_ENABLE,
+ .int_type = INTERRUPT_TYPE_MSI,
+}, msix_field_config = {
+ .enable_bit = PCI_MSIX_FLAGS_ENABLE,
+ .int_type = INTERRUPT_TYPE_MSIX,
+};
+
+static void *msi_field_init(struct pci_dev *dev, int offset)
+{
+ return &msi_field_config;
+}
+
+static void *msix_field_init(struct pci_dev *dev, int offset)
+{
+ return &msix_field_config;
+}
+
+static int msi_msix_flags_write(struct pci_dev *dev, int offset, u16 new_value,
+ void *data)
+{
+ int err;
+ u16 old_value;
+ const struct msi_msix_field_config *field_config = data;
+ const struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
+
+ if (xen_pcibk_permissive || dev_data->permissive)
+ goto write;
+
+ err = pci_read_config_word(dev, offset, &old_value);
+ if (err)
+ return err;
+
+ if (new_value == old_value)
+ return 0;
+
+ if (!dev_data->allow_interrupt_control ||
+ (new_value ^ old_value) & ~field_config->enable_bit)
+ return PCIBIOS_SET_FAILED;
+
+ if (new_value & field_config->enable_bit) {
+ /* don't allow enabling together with other interrupt types */
+ int int_type = xen_pcibk_get_interrupt_type(dev);
+
+ if (int_type == INTERRUPT_TYPE_NONE ||
+ int_type == field_config->int_type)
+ goto write;
+ return PCIBIOS_SET_FAILED;
+ }
+
+write:
+ return pci_write_config_word(dev, offset, new_value);
+}
+
+static const struct config_field caplist_msix[] = {
+ {
+ .offset = PCI_MSIX_FLAGS,
+ .size = 2,
+ .init = msix_field_init,
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = msi_msix_flags_write,
+ },
+ {}
+};
+
+static const struct config_field caplist_msi[] = {
+ {
+ .offset = PCI_MSI_FLAGS,
+ .size = 2,
+ .init = msi_field_init,
+ .u.w.read = xen_pcibk_read_config_word,
+ .u.w.write = msi_msix_flags_write,
+ },
+ {}
+};
+
static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
.capability = PCI_CAP_ID_PM,
.fields = caplist_pm,
@@ -197,11 +276,21 @@ static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
.capability = PCI_CAP_ID_VPD,
.fields = caplist_vpd,
};
+static struct xen_pcibk_config_capability xen_pcibk_config_capability_msi = {
+ .capability = PCI_CAP_ID_MSI,
+ .fields = caplist_msi,
+};
+static struct xen_pcibk_config_capability xen_pcibk_config_capability_msix = {
+ .capability = PCI_CAP_ID_MSIX,
+ .fields = caplist_msix,
+};
int xen_pcibk_config_capability_init(void)
{
register_capability(&xen_pcibk_config_capability_vpd);
register_capability(&xen_pcibk_config_capability_pm);
+ register_capability(&xen_pcibk_config_capability_msi);
+ register_capability(&xen_pcibk_config_capability_msix);
return 0;
}
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index 10ae24b5a76e..fb4fccb4aecc 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -117,6 +117,25 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
pci_clear_mwi(dev);
}
+ if (dev_data && dev_data->allow_interrupt_control) {
+ if ((cmd->val ^ value) & PCI_COMMAND_INTX_DISABLE) {
+ if (value & PCI_COMMAND_INTX_DISABLE) {
+ pci_intx(dev, 0);
+ } else {
+ /* Do not allow enabling INTx together with MSI or MSI-X. */
+ switch (xen_pcibk_get_interrupt_type(dev)) {
+ case INTERRUPT_TYPE_NONE:
+ pci_intx(dev, 1);
+ break;
+ case INTERRUPT_TYPE_INTX:
+ break;
+ default:
+ return PCIBIOS_SET_FAILED;
+ }
+ }
+ }
+ }
+
cmd->val = value;
if (!xen_pcibk_permissive && (!dev_data || !dev_data->permissive))
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 097410a7cdb7..7af93d65ed51 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -304,6 +304,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
xen_pcibk_config_reset_dev(dev);
xen_pcibk_config_free_dyn_fields(dev);
+ dev_data->allow_interrupt_control = 0;
+
xen_unregister_device_domain_owner(dev);
spin_lock_irqsave(&found_psdev->lock, flags);
@@ -1431,6 +1433,65 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf)
}
static DRIVER_ATTR_RW(permissive);
+static ssize_t allow_interrupt_control_store(struct device_driver *drv,
+ const char *buf, size_t count)
+{
+ int domain, bus, slot, func;
+ int err;
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
+ if (err)
+ goto out;
+
+ psdev = pcistub_device_find(domain, bus, slot, func);
+ if (!psdev) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dev_data = pci_get_drvdata(psdev->dev);
+ /* the driver data for a device should never be null at this point */
+ if (!dev_data) {
+ err = -ENXIO;
+ goto release;
+ }
+ dev_data->allow_interrupt_control = 1;
+release:
+ pcistub_device_put(psdev);
+out:
+ if (!err)
+ err = count;
+ return err;
+}
+
+static ssize_t allow_interrupt_control_show(struct device_driver *drv,
+ char *buf)
+{
+ struct pcistub_device *psdev;
+ struct xen_pcibk_dev_data *dev_data;
+ size_t count = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
+ if (count >= PAGE_SIZE)
+ break;
+ if (!psdev->dev)
+ continue;
+ dev_data = pci_get_drvdata(psdev->dev);
+ if (!dev_data || !dev_data->allow_interrupt_control)
+ continue;
+ count +=
+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
+ pci_name(psdev->dev));
+ }
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+ return count;
+}
+static DRIVER_ATTR_RW(allow_interrupt_control);
+
static void pcistub_exit(void)
{
driver_remove_file(&xen_pcibk_pci_driver.driver, &driver_attr_new_slot);
@@ -1441,6 +1502,8 @@ static void pcistub_exit(void)
driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_permissive);
driver_remove_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_allow_interrupt_control);
+ driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_irq_handlers);
driver_remove_file(&xen_pcibk_pci_driver.driver,
&driver_attr_irq_handler_state);
@@ -1530,6 +1593,9 @@ static int __init pcistub_init(void)
if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver,
&driver_attr_permissive);
+ if (!err)
+ err = driver_create_file(&xen_pcibk_pci_driver.driver,
+ &driver_attr_allow_interrupt_control);
if (!err)
err = driver_create_file(&xen_pcibk_pci_driver.driver,
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index 263c059bff90..ce1077e32466 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -45,6 +45,7 @@ struct xen_pcibk_dev_data {
struct list_head config_fields;
struct pci_saved_state *pci_saved_state;
unsigned int permissive:1;
+ unsigned int allow_interrupt_control:1;
unsigned int warned_on_write:1;
unsigned int enable_intx:1;
unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index d75a2385b37c..5f5b8a7d5b80 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -116,8 +116,6 @@ int xenbus_probe_devices(struct xen_bus_type *bus);
void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
-void xenbus_dev_shutdown(struct device *_dev);
-
int xenbus_dev_suspend(struct device *dev);
int xenbus_dev_resume(struct device *dev);
int xenbus_dev_cancel(struct device *dev);
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 08adc590f631..597af455a522 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -55,6 +55,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/miscdevice.h>
+#include <linux/workqueue.h>
#include <xen/xenbus.h>
#include <xen/xen.h>
@@ -116,6 +117,8 @@ struct xenbus_file_priv {
wait_queue_head_t read_waitq;
struct kref kref;
+
+ struct work_struct wq;
};
/* Read out any raw xenbus messages queued up. */
@@ -300,14 +303,14 @@ static void watch_fired(struct xenbus_watch *watch,
mutex_unlock(&adap->dev_data->reply_mutex);
}
-static void xenbus_file_free(struct kref *kref)
+static void xenbus_worker(struct work_struct *wq)
{
struct xenbus_file_priv *u;
struct xenbus_transaction_holder *trans, *tmp;
struct watch_adapter *watch, *tmp_watch;
struct read_buffer *rb, *tmp_rb;
- u = container_of(kref, struct xenbus_file_priv, kref);
+ u = container_of(wq, struct xenbus_file_priv, wq);
/*
* No need for locking here because there are no other users,
@@ -333,6 +336,18 @@ static void xenbus_file_free(struct kref *kref)
kfree(u);
}
+static void xenbus_file_free(struct kref *kref)
+{
+ struct xenbus_file_priv *u;
+
+ /*
+ * We might be called in xenbus_thread().
+ * Use workqueue to avoid deadlock.
+ */
+ u = container_of(kref, struct xenbus_file_priv, kref);
+ schedule_work(&u->wq);
+}
+
static struct xenbus_transaction_holder *xenbus_get_transaction(
struct xenbus_file_priv *u, uint32_t tx_id)
{
@@ -650,6 +665,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
INIT_LIST_HEAD(&u->watches);
INIT_LIST_HEAD(&u->read_buffers);
init_waitqueue_head(&u->read_waitq);
+ INIT_WORK(&u->wq, xenbus_worker);
mutex_init(&u->reply_mutex);
mutex_init(&u->msgbuffer_mutex);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 5b471889d723..66975da4f3b6 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -232,9 +232,18 @@ int xenbus_dev_probe(struct device *_dev)
return err;
}
+ if (!try_module_get(drv->driver.owner)) {
+ dev_warn(&dev->dev, "failed to acquire module reference on '%s'\n",
+ drv->driver.name);
+ err = -ESRCH;
+ goto fail;
+ }
+
+ spin_lock(&dev->reclaim_lock);
err = drv->probe(dev, id);
+ spin_unlock(&dev->reclaim_lock);
if (err)
- goto fail;
+ goto fail_put;
err = watch_otherend(dev);
if (err) {
@@ -244,9 +253,10 @@ int xenbus_dev_probe(struct device *_dev)
}
return 0;
+fail_put:
+ module_put(drv->driver.owner);
fail:
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
- xenbus_switch_state(dev, XenbusStateClosed);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_dev_probe);
@@ -260,39 +270,30 @@ int xenbus_dev_remove(struct device *_dev)
free_otherend_watch(dev);
- if (drv->remove)
+ if (drv->remove) {
+ spin_lock(&dev->reclaim_lock);
drv->remove(dev);
+ spin_unlock(&dev->reclaim_lock);
+ }
+
+ module_put(drv->driver.owner);
free_otherend_details(dev);
- xenbus_switch_state(dev, XenbusStateClosed);
+ /*
+ * If the toolstack has forced the device state to closing then set
+ * the state to closed now to allow it to be cleaned up.
+ * Similarly, if the driver does not support re-bind, set the
+ * closed.
+ */
+ if (!drv->allow_rebind ||
+ xenbus_read_driver_state(dev->nodename) == XenbusStateClosing)
+ xenbus_switch_state(dev, XenbusStateClosed);
+
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_remove);
-void xenbus_dev_shutdown(struct device *_dev)
-{
- struct xenbus_device *dev = to_xenbus_device(_dev);
- unsigned long timeout = 5*HZ;
-
- DPRINTK("%s", dev->nodename);
-
- get_device(&dev->dev);
- if (dev->state != XenbusStateConnected) {
- pr_info("%s: %s: %s != Connected, skipping\n",
- __func__, dev->nodename, xenbus_strstate(dev->state));
- goto out;
- }
- xenbus_switch_state(dev, XenbusStateClosing);
- timeout = wait_for_completion_timeout(&dev->down, timeout);
- if (!timeout)
- pr_info("%s: %s timeout closing device\n",
- __func__, dev->nodename);
- out:
- put_device(&dev->dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
-
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner, const char *mod_name)
@@ -472,6 +473,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
goto fail;
dev_set_name(&xendev->dev, "%s", devname);
+ spin_lock_init(&xendev->reclaim_lock);
/* Register with generic device framework. */
err = device_register(&xendev->dev);
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index b0bed4faf44c..791f6fe01e91 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -198,7 +198,6 @@ static struct xen_bus_type xenbus_backend = {
.uevent = xenbus_uevent_backend,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
- .shutdown = xenbus_dev_shutdown,
.dev_groups = xenbus_dev_groups,
},
};
@@ -248,6 +247,41 @@ static int backend_probe_and_watch(struct notifier_block *notifier,
return NOTIFY_DONE;
}
+static int backend_reclaim_memory(struct device *dev, void *data)
+{
+ const struct xenbus_driver *drv;
+ struct xenbus_device *xdev;
+
+ if (!dev->driver)
+ return 0;
+ drv = to_xenbus_driver(dev->driver);
+ if (drv && drv->reclaim_memory) {
+ xdev = to_xenbus_device(dev);
+ if (!spin_trylock(&xdev->reclaim_lock))
+ return 0;
+ drv->reclaim_memory(xdev);
+ spin_unlock(&xdev->reclaim_lock);
+ }
+ return 0;
+}
+
+/*
+ * Returns 0 always because we are using shrinker to only detect memory
+ * pressure.
+ */
+static unsigned long backend_shrink_memory_count(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ bus_for_each_dev(&xenbus_backend.bus, NULL, NULL,
+ backend_reclaim_memory);
+ return 0;
+}
+
+static struct shrinker backend_memory_shrinker = {
+ .count_objects = backend_shrink_memory_count,
+ .seeks = DEFAULT_SEEKS,
+};
+
static int __init xenbus_probe_backend_init(void)
{
static struct notifier_block xenstore_notifier = {
@@ -264,6 +298,9 @@ static int __init xenbus_probe_backend_init(void)
register_xenstore_notifier(&xenstore_notifier);
+ if (register_shrinker(&backend_memory_shrinker))
+ pr_warn("shrinker registration failed\n");
+
return 0;
}
subsys_initcall(xenbus_probe_backend_init);
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index a7d90a719cea..8a1650bbe18f 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -126,6 +126,28 @@ static int xenbus_frontend_dev_probe(struct device *dev)
return xenbus_dev_probe(dev);
}
+static void xenbus_frontend_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ pr_info("%s: %s: %s != Connected, skipping\n",
+ __func__, dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ pr_info("%s: %s timeout closing device\n",
+ __func__, dev->nodename);
+ out:
+ put_device(&dev->dev);
+}
+
static const struct dev_pm_ops xenbus_pm_ops = {
.suspend = xenbus_dev_suspend,
.resume = xenbus_frontend_dev_resume,
@@ -146,7 +168,7 @@ static struct xen_bus_type xenbus_frontend = {
.uevent = xenbus_uevent_frontend,
.probe = xenbus_frontend_dev_probe,
.remove = xenbus_dev_remove,
- .shutdown = xenbus_dev_shutdown,
+ .shutdown = xenbus_frontend_dev_shutdown,
.dev_groups = xenbus_dev_groups,
.pm = &xenbus_pm_ops,
OpenPOWER on IntegriCloud