diff options
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/mdev/mdev_sysfs.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 28 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 32 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_nvlink2.c | 8 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 4 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_rdwr.c | 2 | ||||
-rw-r--r-- | drivers/vfio/platform/reset/vfio_platform_amdxgbe.c | 8 | ||||
-rw-r--r-- | drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c | 2 | ||||
-rw-r--r-- | drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c | 2 | ||||
-rw-r--r-- | drivers/vfio/platform/vfio_platform_common.c | 4 | ||||
-rw-r--r-- | drivers/vfio/vfio.c | 39 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 37 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_type1.c | 608 |
14 files changed, 616 insertions, 162 deletions
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 7570c7602ab4..8ad14e5c02bf 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -74,7 +74,7 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev, return count; } -MDEV_TYPE_ATTR_WO(create); +static MDEV_TYPE_ATTR_WO(create); static void mdev_type_release(struct kobject *kobj) { diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 703948c9fbe1..379a02c36e37 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -110,13 +110,15 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev) static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev) { struct resource *res; - int bar; + int i; struct vfio_pci_dummy_resource *dummy_res; INIT_LIST_HEAD(&vdev->dummy_resources_list); - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { - res = vdev->pdev->resource + bar; + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + int bar = i + PCI_STD_RESOURCES; + + res = &vdev->pdev->resource[bar]; if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP)) goto no_mmap; @@ -399,7 +401,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vfio_config_free(vdev); - for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + bar = i + PCI_STD_RESOURCES; if (!vdev->barmap[bar]) continue; pci_iounmap(pdev, vdev->barmap[bar]); @@ -438,11 +441,20 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); /* - * Try to reset the device. The success of this is dependent on - * being able to lock the device, which is not always possible. + * Try to get the locks ourselves to prevent a deadlock. The + * success of this is dependent on being able to lock the device, + * which is not always possible. + * We can not use the "try" reset interface here, which will + * overwrite the previously restored configuration information. */ - if (vdev->reset_works && !pci_try_reset_function(pdev)) - vdev->needs_reset = false; + if (vdev->reset_works && pci_cfg_access_trylock(pdev)) { + if (device_trylock(&pdev->dev)) { + if (!__pci_reset_function_locked(pdev)) + vdev->needs_reset = false; + device_unlock(&pdev->dev); + } + pci_cfg_access_unlock(pdev); + } pci_restore_state(pdev); out: diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index f0891bd8444c..90c0b80f8acf 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -450,30 +450,32 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; int i; - __le32 *bar; + __le32 *vbar; u64 mask; - bar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; + vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; - for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++, bar++) { - if (!pci_resource_start(pdev, i)) { - *bar = 0; /* Unmapped by host = unimplemented to user */ + for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) { + int bar = i + PCI_STD_RESOURCES; + + if (!pci_resource_start(pdev, bar)) { + *vbar = 0; /* Unmapped by host = unimplemented to user */ continue; } - mask = ~(pci_resource_len(pdev, i) - 1); + mask = ~(pci_resource_len(pdev, bar) - 1); - *bar &= cpu_to_le32((u32)mask); - *bar |= vfio_generate_bar_flags(pdev, i); + *vbar &= cpu_to_le32((u32)mask); + *vbar |= vfio_generate_bar_flags(pdev, bar); - if (*bar & cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64)) { - bar++; - *bar &= cpu_to_le32((u32)(mask >> 32)); + if (*vbar & cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64)) { + vbar++; + *vbar &= cpu_to_le32((u32)(mask >> 32)); i++; } } - bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; + vbar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; /* * NB. REGION_INFO will have reported zero size if we weren't able @@ -483,14 +485,14 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev) if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); mask |= PCI_ROM_ADDRESS_ENABLE; - *bar &= cpu_to_le32((u32)mask); + *vbar &= cpu_to_le32((u32)mask); } else if (pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW) { mask = ~(0x20000 - 1); mask |= PCI_ROM_ADDRESS_ENABLE; - *bar &= cpu_to_le32((u32)mask); + *vbar &= cpu_to_le32((u32)mask); } else - *bar = 0; + *vbar = 0; vdev->bardirty = false; } diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3fa3f728fb39..2056f3f85f59 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -294,8 +294,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, irq = pci_irq_vector(pdev, vector); if (vdev->ctx[vector].trigger) { - free_irq(irq, vdev->ctx[vector].trigger); irq_bypass_unregister_producer(&vdev->ctx[vector].producer); + free_irq(irq, vdev->ctx[vector].trigger); kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c index f2983f0f84be..df4d96038cd4 100644 --- a/drivers/vfio/pci/vfio_pci_nvlink2.c +++ b/drivers/vfio/pci/vfio_pci_nvlink2.c @@ -97,8 +97,10 @@ static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev, /* If there were any mappings at all... */ if (data->mm) { - ret = mm_iommu_put(data->mm, data->mem); - WARN_ON(ret); + if (data->mem) { + ret = mm_iommu_put(data->mm, data->mem); + WARN_ON(ret); + } mmdrop(data->mm); } @@ -159,7 +161,7 @@ static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev, data->useraddr = vma->vm_start; data->mm = current->mm; - atomic_inc(&data->mm->mm_count); + mmgrab(data->mm); ret = (int) mm_iommu_newdev(data->mm, data->useraddr, vma_pages(vma), data->gpu_hpa, &data->mem); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index ee6ee91718a4..8a2c7607d513 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -86,8 +86,8 @@ struct vfio_pci_reflck { struct vfio_pci_device { struct pci_dev *pdev; - void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; - bool bar_mmap_supported[PCI_STD_RESOURCE_END + 1]; + void __iomem *barmap[PCI_STD_NUM_BARS]; + bool bar_mmap_supported[PCI_STD_NUM_BARS]; u8 *pci_config_map; u8 *vconfig; struct perm_bits *msi_perm; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 0120d8324a40..a87992892a9f 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -230,7 +230,7 @@ ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, switch ((u32)pos) { case 0xa0000 ... 0xbffff: count = min(count, (size_t)(0xc0000 - pos)); - iomem = ioremap_nocache(0xa0000, 0xbffff - 0xa0000 + 1); + iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1); off = pos - 0xa0000; rsrc = VGA_RSRC_LEGACY_MEM; is_ioport = false; diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c index 2d2babe21b2f..abdca900802d 100644 --- a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c +++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c @@ -24,7 +24,7 @@ #define MDIO_AN_INT 0x8002 #define MDIO_AN_INTMASK 0x8001 -static unsigned int xmdio_read(void *ioaddr, unsigned int mmd, +static unsigned int xmdio_read(void __iomem *ioaddr, unsigned int mmd, unsigned int reg) { unsigned int mmd_address, value; @@ -35,7 +35,7 @@ static unsigned int xmdio_read(void *ioaddr, unsigned int mmd, return value; } -static void xmdio_write(void *ioaddr, unsigned int mmd, +static void xmdio_write(void __iomem *ioaddr, unsigned int mmd, unsigned int reg, unsigned int value) { unsigned int mmd_address; @@ -54,13 +54,13 @@ static int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev) if (!xgmac_regs->ioaddr) { xgmac_regs->ioaddr = - ioremap_nocache(xgmac_regs->addr, xgmac_regs->size); + ioremap(xgmac_regs->addr, xgmac_regs->size); if (!xgmac_regs->ioaddr) return -ENOMEM; } if (!xpcs_regs->ioaddr) { xpcs_regs->ioaddr = - ioremap_nocache(xpcs_regs->addr, xpcs_regs->size); + ioremap(xpcs_regs->addr, xpcs_regs->size); if (!xpcs_regs->ioaddr) return -ENOMEM; } diff --git a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c index 16165a62b86d..96064ef8f629 100644 --- a/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c +++ b/drivers/vfio/platform/reset/vfio_platform_bcmflexrm.c @@ -82,7 +82,7 @@ static int vfio_platform_bcmflexrm_reset(struct vfio_platform_device *vdev) /* Map FlexRM ring registers if not mapped */ if (!reg->ioaddr) { - reg->ioaddr = ioremap_nocache(reg->addr, reg->size); + reg->ioaddr = ioremap(reg->addr, reg->size); if (!reg->ioaddr) return -ENOMEM; } diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index f67bab547501..09a9453b75c5 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -52,7 +52,7 @@ static int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) if (!reg->ioaddr) { reg->ioaddr = - ioremap_nocache(reg->addr, reg->size); + ioremap(reg->addr, reg->size); if (!reg->ioaddr) return -ENOMEM; } diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index e8f2bdbe0542..c0771a9567fb 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -409,7 +409,7 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg, if (!reg->ioaddr) { reg->ioaddr = - ioremap_nocache(reg->addr, reg->size); + ioremap(reg->addr, reg->size); if (!reg->ioaddr) return -ENOMEM; @@ -486,7 +486,7 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg, if (!reg->ioaddr) { reg->ioaddr = - ioremap_nocache(reg->addr, reg->size); + ioremap(reg->addr, reg->size); if (!reg->ioaddr) return -ENOMEM; diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 388597930b64..c8482624ca34 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1184,15 +1184,6 @@ static long vfio_fops_unl_ioctl(struct file *filep, return ret; } -#ifdef CONFIG_COMPAT -static long vfio_fops_compat_ioctl(struct file *filep, - unsigned int cmd, unsigned long arg) -{ - arg = (unsigned long)compat_ptr(arg); - return vfio_fops_unl_ioctl(filep, cmd, arg); -} -#endif /* CONFIG_COMPAT */ - static int vfio_fops_open(struct inode *inode, struct file *filep) { struct vfio_container *container; @@ -1275,9 +1266,7 @@ static const struct file_operations vfio_fops = { .read = vfio_fops_read, .write = vfio_fops_write, .unlocked_ioctl = vfio_fops_unl_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = vfio_fops_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_fops_mmap, }; @@ -1556,15 +1545,6 @@ static long vfio_group_fops_unl_ioctl(struct file *filep, return ret; } -#ifdef CONFIG_COMPAT -static long vfio_group_fops_compat_ioctl(struct file *filep, - unsigned int cmd, unsigned long arg) -{ - arg = (unsigned long)compat_ptr(arg); - return vfio_group_fops_unl_ioctl(filep, cmd, arg); -} -#endif /* CONFIG_COMPAT */ - static int vfio_group_fops_open(struct inode *inode, struct file *filep) { struct vfio_group *group; @@ -1620,9 +1600,7 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep) static const struct file_operations vfio_group_fops = { .owner = THIS_MODULE, .unlocked_ioctl = vfio_group_fops_unl_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = vfio_group_fops_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, .open = vfio_group_fops_open, .release = vfio_group_fops_release, }; @@ -1687,24 +1665,13 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) return device->ops->mmap(device->device_data, vma); } -#ifdef CONFIG_COMPAT -static long vfio_device_fops_compat_ioctl(struct file *filep, - unsigned int cmd, unsigned long arg) -{ - arg = (unsigned long)compat_ptr(arg); - return vfio_device_fops_unl_ioctl(filep, cmd, arg); -} -#endif /* CONFIG_COMPAT */ - static const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, .release = vfio_device_fops_release, .read = vfio_device_fops_read, .write = vfio_device_fops_write, .unlocked_ioctl = vfio_device_fops_unl_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = vfio_device_fops_compat_ioctl, -#endif + .compat_ioctl = compat_ptr_ioctl, .mmap = vfio_device_fops_mmap, }; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 8ce9ad21129f..16b3adc508db 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -79,7 +79,7 @@ static long tce_iommu_mm_set(struct tce_container *container) } BUG_ON(!current->mm); container->mm = current->mm; - atomic_inc(&container->mm->mm_count); + mmgrab(container->mm); return 0; } @@ -176,13 +176,13 @@ put_exit: } static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa, - unsigned int page_shift) + unsigned int it_page_shift) { struct page *page; unsigned long size = 0; - if (mm_iommu_is_devmem(mm, hpa, page_shift, &size)) - return size == (1UL << page_shift); + if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size)) + return size == (1UL << it_page_shift); page = pfn_to_page(hpa >> PAGE_SHIFT); /* @@ -190,7 +190,7 @@ static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa, * a page we just found. Otherwise the hardware can get access to * a bigger memory chunk that it should. */ - return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift; + return page_shift(compound_head(page)) >= it_page_shift; } static inline bool tce_groups_attached(struct tce_container *container) @@ -435,7 +435,7 @@ static int tce_iommu_clear(struct tce_container *container, unsigned long oldhpa; long ret; enum dma_data_direction direction; - unsigned long lastentry = entry + pages; + unsigned long lastentry = entry + pages, firstentry = entry; for ( ; entry < lastentry; ++entry) { if (tbl->it_indirect_levels && tbl->it_userspace) { @@ -460,7 +460,7 @@ static int tce_iommu_clear(struct tce_container *container, direction = DMA_NONE; oldhpa = 0; - ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa, + ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry, &oldhpa, &direction); if (ret) continue; @@ -476,6 +476,8 @@ static int tce_iommu_clear(struct tce_container *container, tce_iommu_unuse_page(container, oldhpa); } + iommu_tce_kill(tbl, firstentry, pages); + return 0; } @@ -518,8 +520,8 @@ static long tce_iommu_build(struct tce_container *container, hpa |= offset; dirtmp = direction; - ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa, - &dirtmp); + ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, + &hpa, &dirtmp); if (ret) { tce_iommu_unuse_page(container, hpa); pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n", @@ -536,6 +538,8 @@ static long tce_iommu_build(struct tce_container *container, if (ret) tce_iommu_clear(container, tbl, entry, i); + else + iommu_tce_kill(tbl, entry, pages); return ret; } @@ -572,8 +576,8 @@ static long tce_iommu_build_v2(struct tce_container *container, if (mm_iommu_mapped_inc(mem)) break; - ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa, - &dirtmp); + ret = iommu_tce_xchg_no_kill(container->mm, tbl, entry + i, + &hpa, &dirtmp); if (ret) { /* dirtmp cannot be DMA_NONE here */ tce_iommu_unuse_page_v2(container, tbl, entry + i); @@ -593,6 +597,8 @@ static long tce_iommu_build_v2(struct tce_container *container, if (ret) tce_iommu_clear(container, tbl, entry, i); + else + iommu_tce_kill(tbl, entry, pages); return ret; } @@ -1234,7 +1240,7 @@ release_exit: static int tce_iommu_attach_group(void *iommu_data, struct iommu_group *iommu_group) { - int ret; + int ret = 0; struct tce_container *container = iommu_data; struct iommu_table_group *table_group; struct tce_iommu_group *tcegrp = NULL; @@ -1287,13 +1293,13 @@ static int tce_iommu_attach_group(void *iommu_data, !table_group->ops->release_ownership) { if (container->v2) { ret = -EPERM; - goto unlock_exit; + goto free_exit; } ret = tce_iommu_take_ownership(container, table_group); } else { if (!container->v2) { ret = -EPERM; - goto unlock_exit; + goto free_exit; } ret = tce_iommu_take_ownership_ddw(container, table_group); if (!tce_groups_attached(container) && !container->tables[0]) @@ -1305,10 +1311,11 @@ static int tce_iommu_attach_group(void *iommu_data, list_add(&tcegrp->next, &container->group_list); } -unlock_exit: +free_exit: if (ret && tcegrp) kfree(tcegrp); +unlock_exit: mutex_unlock(&container->lock); return ret; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 054391f30fa8..a177bf2c6683 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -62,6 +62,7 @@ MODULE_PARM_DESC(dma_entry_limit, struct vfio_iommu { struct list_head domain_list; + struct list_head iova_list; struct vfio_domain *external_domain; /* domain for external user */ struct mutex lock; struct rb_root dma_list; @@ -97,6 +98,12 @@ struct vfio_group { bool mdev_group; /* An mdev group */ }; +struct vfio_iova { + struct list_head list; + dma_addr_t start; + dma_addr_t end; +}; + /* * Guest RAM pinning working set or DMA target */ @@ -287,31 +294,13 @@ static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) * Some mappings aren't backed by a struct page, for example an mmap'd * MMIO range for our own or another device. These use a different * pfn conversion and shouldn't be tracked as locked pages. + * For compound pages, any driver that sets the reserved bit in head + * page needs to set the reserved bit in all subpages to be safe. */ static bool is_invalid_reserved_pfn(unsigned long pfn) { - if (pfn_valid(pfn)) { - bool reserved; - struct page *tail = pfn_to_page(pfn); - struct page *head = compound_head(tail); - reserved = !!(PageReserved(head)); - if (head != tail) { - /* - * "head" is not a dangling pointer - * (compound_head takes care of that) - * but the hugepage may have been split - * from under us (and we may not hold a - * reference count on the head page so it can - * be reused before we run PageReferenced), so - * we've to check PageTail before returning - * what we just read. - */ - smp_rmb(); - if (PageTail(tail)) - return reserved; - } - return PageReserved(tail); - } + if (pfn_valid(pfn)) + return PageReserved(pfn_to_page(pfn)); return true; } @@ -320,9 +309,8 @@ static int put_pfn(unsigned long pfn, int prot) { if (!is_invalid_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); - if (prot & IOMMU_WRITE) - SetPageDirty(page); - put_page(page); + + unpin_user_pages_dirty_lock(&page, 1, prot & IOMMU_WRITE); return 1; } return 0; @@ -333,7 +321,6 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, { struct page *page[1]; struct vm_area_struct *vma; - struct vm_area_struct *vmas[1]; unsigned int flags = 0; int ret; @@ -341,32 +328,15 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE; down_read(&mm->mmap_sem); - if (mm == current->mm) { - ret = get_user_pages(vaddr, 1, flags | FOLL_LONGTERM, page, - vmas); - } else { - ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page, - vmas, NULL); - /* - * The lifetime of a vaddr_get_pfn() page pin is - * userspace-controlled. In the fs-dax case this could - * lead to indefinite stalls in filesystem operations. - * Disallow attempts to pin fs-dax pages via this - * interface. - */ - if (ret > 0 && vma_is_fsdax(vmas[0])) { - ret = -EOPNOTSUPP; - put_page(page[0]); - } - } - up_read(&mm->mmap_sem); - + ret = pin_user_pages_remote(NULL, mm, vaddr, 1, flags | FOLL_LONGTERM, + page, NULL, NULL); if (ret == 1) { *pfn = page_to_pfn(page[0]); - return 0; + ret = 0; + goto done; } - down_read(&mm->mmap_sem); + vaddr = untagged_addr(vaddr); vma = find_vma_intersection(mm, vaddr, vaddr + 1); @@ -375,7 +345,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, if (is_invalid_reserved_pfn(*pfn)) ret = 0; } - +done: up_read(&mm->mmap_sem); return ret; } @@ -650,12 +620,13 @@ unpin_exit: } static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, - struct list_head *regions) + struct list_head *regions, + struct iommu_iotlb_gather *iotlb_gather) { long unlocked = 0; struct vfio_regions *entry, *next; - iommu_tlb_sync(domain->domain); + iommu_tlb_sync(domain->domain, iotlb_gather); list_for_each_entry_safe(entry, next, regions, list) { unlocked += vfio_unpin_pages_remote(dma, @@ -685,18 +656,19 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, struct vfio_dma *dma, dma_addr_t *iova, size_t len, phys_addr_t phys, long *unlocked, struct list_head *unmapped_list, - int *unmapped_cnt) + int *unmapped_cnt, + struct iommu_iotlb_gather *iotlb_gather) { size_t unmapped = 0; struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry) { - unmapped = iommu_unmap_fast(domain->domain, *iova, len); + unmapped = iommu_unmap_fast(domain->domain, *iova, len, + iotlb_gather); if (!unmapped) { kfree(entry); } else { - iommu_tlb_range_add(domain->domain, *iova, unmapped); entry->iova = *iova; entry->phys = phys; entry->len = unmapped; @@ -712,8 +684,8 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, * or in case of errors. */ if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) { - *unlocked += vfio_sync_unpin(dma, domain, - unmapped_list); + *unlocked += vfio_sync_unpin(dma, domain, unmapped_list, + iotlb_gather); *unmapped_cnt = 0; } @@ -744,6 +716,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma_addr_t iova = dma->iova, end = dma->iova + dma->size; struct vfio_domain *domain, *d; LIST_HEAD(unmapped_region_list); + struct iommu_iotlb_gather iotlb_gather; int unmapped_region_cnt = 0; long unlocked = 0; @@ -768,6 +741,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, cond_resched(); } + iommu_iotlb_gather_init(&iotlb_gather); while (iova < end) { size_t unmapped, len; phys_addr_t phys, next; @@ -796,7 +770,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, */ unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys, &unlocked, &unmapped_region_list, - &unmapped_region_cnt); + &unmapped_region_cnt, + &iotlb_gather); if (!unmapped) { unmapped = unmap_unpin_slow(domain, dma, &iova, len, phys, &unlocked); @@ -807,8 +782,10 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->iommu_mapped = false; - if (unmapped_region_cnt) - unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list); + if (unmapped_region_cnt) { + unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list, + &iotlb_gather); + } if (do_accounting) { vfio_lock_acct(dma, -unlocked, true); @@ -1031,6 +1008,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, return ret; } +/* + * Check dma map request is within a valid iova range + */ +static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, + dma_addr_t start, dma_addr_t end) +{ + struct list_head *iova = &iommu->iova_list; + struct vfio_iova *node; + + list_for_each_entry(node, iova, list) { + if (start >= node->start && end <= node->end) + return true; + } + + /* + * Check for list_empty() as well since a container with + * a single mdev device will have an empty list. + */ + return list_empty(iova); +} + static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { @@ -1074,6 +1072,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } + if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) { + ret = -EINVAL; + goto out_unlock; + } + dma = kzalloc(sizeof(*dma), GFP_KERNEL); if (!dma) { ret = -ENOMEM; @@ -1263,15 +1266,13 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } -static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) +static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, + phys_addr_t *base) { - struct list_head group_resv_regions; - struct iommu_resv_region *region, *next; + struct iommu_resv_region *region; bool ret = false; - INIT_LIST_HEAD(&group_resv_regions); - iommu_get_group_resv_regions(group, &group_resv_regions); - list_for_each_entry(region, &group_resv_regions, list) { + list_for_each_entry(region, group_resv_regions, list) { /* * The presence of any 'real' MSI regions should take * precedence over the software-managed one if the @@ -1287,8 +1288,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t *base) ret = true; } } - list_for_each_entry_safe(region, next, &group_resv_regions, list) - kfree(region); + return ret; } @@ -1388,6 +1388,228 @@ static int vfio_mdev_iommu_device(struct device *dev, void *data) return 0; } +/* + * This is a helper function to insert an address range to iova list. + * The list is initially created with a single entry corresponding to + * the IOMMU domain geometry to which the device group is attached. + * The list aperture gets modified when a new domain is added to the + * container if the new aperture doesn't conflict with the current one + * or with any existing dma mappings. The list is also modified to + * exclude any reserved regions associated with the device group. + */ +static int vfio_iommu_iova_insert(struct list_head *head, + dma_addr_t start, dma_addr_t end) +{ + struct vfio_iova *region; + + region = kmalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return -ENOMEM; + + INIT_LIST_HEAD(®ion->list); + region->start = start; + region->end = end; + + list_add_tail(®ion->list, head); + return 0; +} + +/* + * Check the new iommu aperture conflicts with existing aper or with any + * existing dma mappings. + */ +static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu, + dma_addr_t start, dma_addr_t end) +{ + struct vfio_iova *first, *last; + struct list_head *iova = &iommu->iova_list; + + if (list_empty(iova)) + return false; + + /* Disjoint sets, return conflict */ + first = list_first_entry(iova, struct vfio_iova, list); + last = list_last_entry(iova, struct vfio_iova, list); + if (start > last->end || end < first->start) + return true; + + /* Check for any existing dma mappings below the new start */ + if (start > first->start) { + if (vfio_find_dma(iommu, first->start, start - first->start)) + return true; + } + + /* Check for any existing dma mappings beyond the new end */ + if (end < last->end) { + if (vfio_find_dma(iommu, end + 1, last->end - end)) + return true; + } + + return false; +} + +/* + * Resize iommu iova aperture window. This is called only if the new + * aperture has no conflict with existing aperture and dma mappings. + */ +static int vfio_iommu_aper_resize(struct list_head *iova, + dma_addr_t start, dma_addr_t end) +{ + struct vfio_iova *node, *next; + + if (list_empty(iova)) + return vfio_iommu_iova_insert(iova, start, end); + + /* Adjust iova list start */ + list_for_each_entry_safe(node, next, iova, list) { + if (start < node->start) + break; + if (start >= node->start && start < node->end) { + node->start = start; + break; + } + /* Delete nodes before new start */ + list_del(&node->list); + kfree(node); + } + + /* Adjust iova list end */ + list_for_each_entry_safe(node, next, iova, list) { + if (end > node->end) + continue; + if (end > node->start && end <= node->end) { + node->end = end; + continue; + } + /* Delete nodes after new end */ + list_del(&node->list); + kfree(node); + } + + return 0; +} + +/* + * Check reserved region conflicts with existing dma mappings + */ +static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu, + struct list_head *resv_regions) +{ + struct iommu_resv_region *region; + + /* Check for conflict with existing dma mappings */ + list_for_each_entry(region, resv_regions, list) { + if (region->type == IOMMU_RESV_DIRECT_RELAXABLE) + continue; + + if (vfio_find_dma(iommu, region->start, region->length)) + return true; + } + + return false; +} + +/* + * Check iova region overlap with reserved regions and + * exclude them from the iommu iova range + */ +static int vfio_iommu_resv_exclude(struct list_head *iova, + struct list_head *resv_regions) +{ + struct iommu_resv_region *resv; + struct vfio_iova *n, *next; + + list_for_each_entry(resv, resv_regions, list) { + phys_addr_t start, end; + + if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) + continue; + + start = resv->start; + end = resv->start + resv->length - 1; + + list_for_each_entry_safe(n, next, iova, list) { + int ret = 0; + + /* No overlap */ + if (start > n->end || end < n->start) + continue; + /* + * Insert a new node if current node overlaps with the + * reserve region to exlude that from valid iova range. + * Note that, new node is inserted before the current + * node and finally the current node is deleted keeping + * the list updated and sorted. + */ + if (start > n->start) + ret = vfio_iommu_iova_insert(&n->list, n->start, + start - 1); + if (!ret && end < n->end) + ret = vfio_iommu_iova_insert(&n->list, end + 1, + n->end); + if (ret) + return ret; + + list_del(&n->list); + kfree(n); + } + } + + if (list_empty(iova)) + return -EINVAL; + + return 0; +} + +static void vfio_iommu_resv_free(struct list_head *resv_regions) +{ + struct iommu_resv_region *n, *next; + + list_for_each_entry_safe(n, next, resv_regions, list) { + list_del(&n->list); + kfree(n); + } +} + +static void vfio_iommu_iova_free(struct list_head *iova) +{ + struct vfio_iova *n, *next; + + list_for_each_entry_safe(n, next, iova, list) { + list_del(&n->list); + kfree(n); + } +} + +static int vfio_iommu_iova_get_copy(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct list_head *iova = &iommu->iova_list; + struct vfio_iova *n; + int ret; + + list_for_each_entry(n, iova, list) { + ret = vfio_iommu_iova_insert(iova_copy, n->start, n->end); + if (ret) + goto out_free; + } + + return 0; + +out_free: + vfio_iommu_iova_free(iova_copy); + return ret; +} + +static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct list_head *iova = &iommu->iova_list; + + vfio_iommu_iova_free(iova); + + list_splice_tail(iova_copy, iova); +} static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group) { @@ -1397,7 +1619,10 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, struct bus_type *bus = NULL; int ret; bool resv_msi, msi_remap; - phys_addr_t resv_msi_base; + phys_addr_t resv_msi_base = 0; + struct iommu_domain_geometry geo; + LIST_HEAD(iova_copy); + LIST_HEAD(group_resv_regions); mutex_lock(&iommu->lock); @@ -1474,7 +1699,43 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, if (ret) goto out_domain; - resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base); + /* Get aperture info */ + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo); + + if (vfio_iommu_aper_conflict(iommu, geo.aperture_start, + geo.aperture_end)) { + ret = -EINVAL; + goto out_detach; + } + + ret = iommu_get_group_resv_regions(iommu_group, &group_resv_regions); + if (ret) + goto out_detach; + + if (vfio_iommu_resv_conflict(iommu, &group_resv_regions)) { + ret = -EINVAL; + goto out_detach; + } + + /* + * We don't want to work on the original iova list as the list + * gets modified and in case of failure we have to retain the + * original list. Get a copy here. + */ + ret = vfio_iommu_iova_get_copy(iommu, &iova_copy); + if (ret) + goto out_detach; + + ret = vfio_iommu_aper_resize(&iova_copy, geo.aperture_start, + geo.aperture_end); + if (ret) + goto out_detach; + + ret = vfio_iommu_resv_exclude(&iova_copy, &group_resv_regions); + if (ret) + goto out_detach; + + resv_msi = vfio_iommu_has_sw_msi(&group_resv_regions, &resv_msi_base); INIT_LIST_HEAD(&domain->group_list); list_add(&group->next, &domain->group_list); @@ -1507,8 +1768,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &d->group_list); iommu_domain_free(domain->domain); kfree(domain); - mutex_unlock(&iommu->lock); - return 0; + goto done; } ret = vfio_iommu_attach_group(domain, group); @@ -1531,8 +1791,11 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, } list_add(&domain->next, &iommu->domain_list); - +done: + /* Delete the old one and insert new iova list */ + vfio_iommu_iova_insert_copy(iommu, &iova_copy); mutex_unlock(&iommu->lock); + vfio_iommu_resv_free(&group_resv_regions); return 0; @@ -1540,6 +1803,8 @@ out_detach: vfio_iommu_detach_group(domain, group); out_domain: iommu_domain_free(domain->domain); + vfio_iommu_iova_free(&iova_copy); + vfio_iommu_resv_free(&group_resv_regions); out_free: kfree(domain); kfree(group); @@ -1595,12 +1860,93 @@ static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu) WARN_ON(iommu->notifier.head); } +/* + * Called when a domain is removed in detach. It is possible that + * the removed domain decided the iova aperture window. Modify the + * iova aperture with the smallest window among existing domains. + */ +static void vfio_iommu_aper_expand(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct vfio_domain *domain; + struct iommu_domain_geometry geo; + struct vfio_iova *node; + dma_addr_t start = 0; + dma_addr_t end = (dma_addr_t)~0; + + if (list_empty(iova_copy)) + return; + + list_for_each_entry(domain, &iommu->domain_list, next) { + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, + &geo); + if (geo.aperture_start > start) + start = geo.aperture_start; + if (geo.aperture_end < end) + end = geo.aperture_end; + } + + /* Modify aperture limits. The new aper is either same or bigger */ + node = list_first_entry(iova_copy, struct vfio_iova, list); + node->start = start; + node = list_last_entry(iova_copy, struct vfio_iova, list); + node->end = end; +} + +/* + * Called when a group is detached. The reserved regions for that + * group can be part of valid iova now. But since reserved regions + * may be duplicated among groups, populate the iova valid regions + * list again. + */ +static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, + struct list_head *iova_copy) +{ + struct vfio_domain *d; + struct vfio_group *g; + struct vfio_iova *node; + dma_addr_t start, end; + LIST_HEAD(resv_regions); + int ret; + + if (list_empty(iova_copy)) + return -EINVAL; + + list_for_each_entry(d, &iommu->domain_list, next) { + list_for_each_entry(g, &d->group_list, next) { + ret = iommu_get_group_resv_regions(g->iommu_group, + &resv_regions); + if (ret) + goto done; + } + } + + node = list_first_entry(iova_copy, struct vfio_iova, list); + start = node->start; + node = list_last_entry(iova_copy, struct vfio_iova, list); + end = node->end; + + /* purge the iova list and create new one */ + vfio_iommu_iova_free(iova_copy); + + ret = vfio_iommu_aper_resize(iova_copy, start, end); + if (ret) + goto done; + + /* Exclude current reserved regions from iova ranges */ + ret = vfio_iommu_resv_exclude(iova_copy, &resv_regions); +done: + vfio_iommu_resv_free(&resv_regions); + return ret; +} + static void vfio_iommu_type1_detach_group(void *iommu_data, struct iommu_group *iommu_group) { struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain; struct vfio_group *group; + LIST_HEAD(iova_copy); mutex_lock(&iommu->lock); @@ -1623,6 +1969,13 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, } } + /* + * Get a copy of iova list. This will be used to update + * and to replace the current one later. Please note that + * we will leave the original list as it is if update fails. + */ + vfio_iommu_iova_get_copy(iommu, &iova_copy); + list_for_each_entry(domain, &iommu->domain_list, next) { group = find_iommu_group(domain, iommu_group); if (!group) @@ -1648,10 +2001,16 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, iommu_domain_free(domain->domain); list_del(&domain->next); kfree(domain); + vfio_iommu_aper_expand(iommu, &iova_copy); } break; } + if (!vfio_iommu_resv_refresh(iommu, &iova_copy)) + vfio_iommu_iova_insert_copy(iommu, &iova_copy); + else + vfio_iommu_iova_free(&iova_copy); + detach_group_done: mutex_unlock(&iommu->lock); } @@ -1679,6 +2038,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) } INIT_LIST_HEAD(&iommu->domain_list); + INIT_LIST_HEAD(&iommu->iova_list); iommu->dma_list = RB_ROOT; iommu->dma_avail = dma_entry_limit; mutex_init(&iommu->lock); @@ -1722,6 +2082,9 @@ static void vfio_iommu_type1_release(void *iommu_data) list_del(&domain->next); kfree(domain); } + + vfio_iommu_iova_free(&iommu->iova_list); + kfree(iommu); } @@ -1742,6 +2105,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, + size_t size) +{ + struct vfio_info_cap_header *header; + struct vfio_iommu_type1_info_cap_iova_range *iova_cap; + + header = vfio_info_cap_add(caps, size, + VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + iova_cap = container_of(header, + struct vfio_iommu_type1_info_cap_iova_range, + header); + iova_cap->nr_iovas = cap_iovas->nr_iovas; + memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges, + cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges)); + return 0; +} + +static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, + struct vfio_info_cap *caps) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas; + struct vfio_iova *iova; + size_t size; + int iovas = 0, i = 0, ret; + + mutex_lock(&iommu->lock); + + list_for_each_entry(iova, &iommu->iova_list, list) + iovas++; + + if (!iovas) { + /* + * Return 0 as a container with a single mdev device + * will have an empty list + */ + ret = 0; + goto out_unlock; + } + + size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges)); + + cap_iovas = kzalloc(size, GFP_KERNEL); + if (!cap_iovas) { + ret = -ENOMEM; + goto out_unlock; + } + + cap_iovas->nr_iovas = iovas; + + list_for_each_entry(iova, &iommu->iova_list, list) { + cap_iovas->iova_ranges[i].start = iova->start; + cap_iovas->iova_ranges[i].end = iova->end; + i++; + } + + ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size); + + kfree(cap_iovas); +out_unlock: + mutex_unlock(&iommu->lock); + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1763,19 +2193,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } } else if (cmd == VFIO_IOMMU_GET_INFO) { struct vfio_iommu_type1_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + unsigned long capsz; + int ret; minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + /* For backward compatibility, cannot require this */ + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); + if (copy_from_user(&info, (void __user *)arg, minsz)) return -EFAULT; if (info.argsz < minsz) return -EINVAL; + if (info.argsz >= capsz) { + minsz = capsz; + info.cap_offset = 0; /* output, no-recopy necessary */ + } + info.flags = VFIO_IOMMU_INFO_PGSIZES; info.iova_pgsizes = vfio_pgsize_bitmap(iommu); + ret = vfio_iommu_iova_build_caps(iommu, &caps); + if (ret) + return ret; + + if (caps.size) { + info.flags |= VFIO_IOMMU_INFO_CAPS; + + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { + kfree(caps.buf); + return -EFAULT; + } + info.cap_offset = sizeof(info); + } + + kfree(caps.buf); + } + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; |