diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-13 17:05:32 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-13 17:05:32 -0800 |
commit | 934f98d7e8123892bd9ca8ea08728ee0784e6597 (patch) | |
tree | 1a73064970020d945ca5f2c31ce420021b99e94c /drivers/vfio/vfio.c | |
parent | f3996e6ac6e2bd739d8a82cc9acae0653c2d5dca (diff) | |
parent | 222e684ca762e9288108fcf852eb5d08cbe10ae3 (diff) | |
download | talos-obmc-linux-934f98d7e8123892bd9ca8ea08728ee0784e6597.tar.gz talos-obmc-linux-934f98d7e8123892bd9ca8ea08728ee0784e6597.zip |
Merge tag 'vfio-v4.4-rc1' of git://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Use kernel interfaces for VPD emulation (Alex Williamson)
- Platform fix for releasing IRQs (Eric Auger)
- Type1 IOMMU always advertises PAGE_SIZE support when smaller mapping
sizes are available (Eric Auger)
- Platform fixes for incorrectly using copies of structures rather than
pointers to structures (James Morse)
- Rework platform reset modules, fix leak, and add AMD xgbe reset
module (Eric Auger)
- Fix vfio_device_get_from_name() return value (Joerg Roedel)
- No-IOMMU interface (Alex Williamson)
- Fix potential out of bounds array access in PCI config handling (Dan
Carpenter)
* tag 'vfio-v4.4-rc1' of git://github.com/awilliam/linux-vfio:
vfio/pci: make an array larger
vfio: Include No-IOMMU mode
vfio: Fix bug in vfio_device_get_from_name()
VFIO: platform: reset: AMD xgbe reset module
vfio: platform: reset: calxedaxgmac: fix ioaddr leak
vfio: platform: add dev_info on device reset
vfio: platform: use list of registered reset function
vfio: platform: add compat in vfio_platform_device
vfio: platform: reset: calxedaxgmac: add reset function registration
vfio: platform: introduce module_vfio_reset_handler macro
vfio: platform: add capability to register a reset function
vfio: platform: introduce vfio-platform-base module
vfio/platform: store mapped memory in region, instead of an on-stack copy
vfio/type1: handle case where IOMMU does not support PAGE_SIZE size
VFIO: platform: clear IRQ_NOAUTOEN when de-assigning the IRQ
vfio/pci: Use kernel VPD access functions
vfio: Whitelist PCI bridges
Diffstat (limited to 'drivers/vfio/vfio.c')
-rw-r--r-- | drivers/vfio/vfio.c | 224 |
1 files changed, 209 insertions, 15 deletions
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 563c510f285c..de632da2e22f 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -25,6 +25,7 @@ #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/pci.h> #include <linux/rwsem.h> #include <linux/sched.h> #include <linux/slab.h> @@ -61,6 +62,7 @@ struct vfio_container { struct rw_semaphore group_lock; struct vfio_iommu_driver *iommu_driver; void *iommu_data; + bool noiommu; }; struct vfio_unbound_dev { @@ -83,6 +85,7 @@ struct vfio_group { struct list_head unbound_list; struct mutex unbound_lock; atomic_t opened; + bool noiommu; }; struct vfio_device { @@ -94,6 +97,147 @@ struct vfio_device { void *device_data; }; +#ifdef CONFIG_VFIO_NOIOMMU +static bool noiommu __read_mostly; +module_param_named(enable_unsafe_noiommu_support, + noiommu, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)"); +#endif + +/* + * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe + * and remove functions, any use cases other than acquiring the first + * reference for the purpose of calling vfio_add_group_dev() or removing + * that symmetric reference after vfio_del_group_dev() should use the raw + * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put() + * removes the device from the dummy group and cannot be nested. + */ +struct iommu_group *vfio_iommu_group_get(struct device *dev) +{ + struct iommu_group *group; + int __maybe_unused ret; + + group = iommu_group_get(dev); + +#ifdef CONFIG_VFIO_NOIOMMU + /* + * With noiommu enabled, an IOMMU group will be created for a device + * that doesn't already have one and doesn't have an iommu_ops on their + * bus. We use iommu_present() again in the main code to detect these + * fake groups. + */ + if (group || !noiommu || iommu_present(dev->bus)) + return group; + + group = iommu_group_alloc(); + if (IS_ERR(group)) + return NULL; + + iommu_group_set_name(group, "vfio-noiommu"); + ret = iommu_group_add_device(group, dev); + iommu_group_put(group); + if (ret) + return NULL; + + /* + * Where to taint? At this point we've added an IOMMU group for a + * device that is not backed by iommu_ops, therefore any iommu_ + * callback using iommu_ops can legitimately Oops. So, while we may + * be about to give a DMA capable device to a user without IOMMU + * protection, which is clearly taint-worthy, let's go ahead and do + * it here. + */ + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); +#endif + + return group; +} +EXPORT_SYMBOL_GPL(vfio_iommu_group_get); + +void vfio_iommu_group_put(struct iommu_group *group, struct device *dev) +{ +#ifdef CONFIG_VFIO_NOIOMMU + if (!iommu_present(dev->bus)) + iommu_group_remove_device(dev); +#endif + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(vfio_iommu_group_put); + +#ifdef CONFIG_VFIO_NOIOMMU +static void *vfio_noiommu_open(unsigned long arg) +{ + if (arg != VFIO_NOIOMMU_IOMMU) + return ERR_PTR(-EINVAL); + if (!capable(CAP_SYS_RAWIO)) + return ERR_PTR(-EPERM); + + return NULL; +} + +static void vfio_noiommu_release(void *iommu_data) +{ +} + +static long vfio_noiommu_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + if (cmd == VFIO_CHECK_EXTENSION) + return arg == VFIO_NOIOMMU_IOMMU ? 1 : 0; + + return -ENOTTY; +} + +static int vfio_iommu_present(struct device *dev, void *unused) +{ + return iommu_present(dev->bus) ? 1 : 0; +} + +static int vfio_noiommu_attach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + return iommu_group_for_each_dev(iommu_group, NULL, + vfio_iommu_present) ? -EINVAL : 0; +} + +static void vfio_noiommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ +} + +static struct vfio_iommu_driver_ops vfio_noiommu_ops = { + .name = "vfio-noiommu", + .owner = THIS_MODULE, + .open = vfio_noiommu_open, + .release = vfio_noiommu_release, + .ioctl = vfio_noiommu_ioctl, + .attach_group = vfio_noiommu_attach_group, + .detach_group = vfio_noiommu_detach_group, +}; + +static struct vfio_iommu_driver vfio_noiommu_driver = { + .ops = &vfio_noiommu_ops, +}; + +/* + * Wrap IOMMU drivers, the noiommu driver is the one and only driver for + * noiommu groups (and thus containers) and not available for normal groups. + */ +#define vfio_for_each_iommu_driver(con, pos) \ + for (pos = con->noiommu ? &vfio_noiommu_driver : \ + list_first_entry(&vfio.iommu_drivers_list, \ + struct vfio_iommu_driver, vfio_next); \ + (con->noiommu ? pos != NULL : \ + &pos->vfio_next != &vfio.iommu_drivers_list); \ + pos = con->noiommu ? NULL : list_next_entry(pos, vfio_next)) +#else +#define vfio_for_each_iommu_driver(con, pos) \ + list_for_each_entry(pos, &vfio.iommu_drivers_list, vfio_next) +#endif + + /** * IOMMU driver registration */ @@ -198,7 +342,8 @@ static void vfio_group_unlock_and_free(struct vfio_group *group) /** * Group objects - create, release, get, put, search */ -static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) +static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, + bool noiommu) { struct vfio_group *group, *tmp; struct device *dev; @@ -216,6 +361,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) atomic_set(&group->container_users, 0); atomic_set(&group->opened, 0); group->iommu_group = iommu_group; + group->noiommu = noiommu; group->nb.notifier_call = vfio_iommu_group_notifier; @@ -251,7 +397,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) dev = device_create(vfio.class, NULL, MKDEV(MAJOR(vfio.group_devt), minor), - group, "%d", iommu_group_id(iommu_group)); + group, "%s%d", noiommu ? "noiommu-" : "", + iommu_group_id(iommu_group)); if (IS_ERR(dev)) { vfio_free_group_minor(minor); vfio_group_unlock_and_free(group); @@ -438,16 +585,33 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group, } /* - * Whitelist some drivers that we know are safe (no dma) or just sit on - * a device. It's not always practical to leave a device within a group - * driverless as it could get re-bound to something unsafe. + * Some drivers, like pci-stub, are only used to prevent other drivers from + * claiming a device and are therefore perfectly legitimate for a user owned + * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping + * of the device, but it does prevent the user from having direct access to + * the device, which is useful in some circumstances. + * + * We also assume that we can include PCI interconnect devices, ie. bridges. + * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge + * then all of the downstream devices will be part of the same IOMMU group as + * the bridge. Thus, if placing the bridge into the user owned IOVA space + * breaks anything, it only does so for user owned devices downstream. Note + * that error notification via MSI can be affected for platforms that handle + * MSI within the same IOVA space as DMA. */ -static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" }; +static const char * const vfio_driver_whitelist[] = { "pci-stub" }; -static bool vfio_whitelisted_driver(struct device_driver *drv) +static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) { int i; + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) + return true; + } + for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { if (!strcmp(drv->name, vfio_driver_whitelist[i])) return true; @@ -462,6 +626,7 @@ static bool vfio_whitelisted_driver(struct device_driver *drv) * - driver-less * - bound to a vfio driver * - bound to a whitelisted driver + * - a PCI interconnect device * * We use two methods to determine whether a device is bound to a vfio * driver. The first is to test whether the device exists in the vfio @@ -486,7 +651,7 @@ static int vfio_dev_viable(struct device *dev, void *data) } mutex_unlock(&group->unbound_lock); - if (!ret || !drv || vfio_whitelisted_driver(drv)) + if (!ret || !drv || vfio_dev_whitelisted(dev, drv)) return 0; device = vfio_group_get_device(group, dev); @@ -621,7 +786,8 @@ int vfio_add_group_dev(struct device *dev, group = vfio_group_get_from_iommu(iommu_group); if (!group) { - group = vfio_create_group(iommu_group); + group = vfio_create_group(iommu_group, + !iommu_present(dev->bus)); if (IS_ERR(group)) { iommu_group_put(iommu_group); return PTR_ERR(group); @@ -692,11 +858,12 @@ EXPORT_SYMBOL_GPL(vfio_device_get_from_dev); static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, char *buf) { - struct vfio_device *device; + struct vfio_device *it, *device = NULL; mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (!strcmp(dev_name(device->dev), buf)) { + list_for_each_entry(it, &group->device_list, group_next) { + if (!strcmp(dev_name(it->dev), buf)) { + device = it; vfio_device_get(device); break; } @@ -832,8 +999,7 @@ static long vfio_ioctl_check_extension(struct vfio_container *container, */ if (!driver) { mutex_lock(&vfio.iommu_drivers_lock); - list_for_each_entry(driver, &vfio.iommu_drivers_list, - vfio_next) { + vfio_for_each_iommu_driver(container, driver) { if (!try_module_get(driver->ops->owner)) continue; @@ -902,7 +1068,7 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, } mutex_lock(&vfio.iommu_drivers_lock); - list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) { + vfio_for_each_iommu_driver(container, driver) { void *data; if (!try_module_get(driver->ops->owner)) @@ -1167,6 +1333,9 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) if (atomic_read(&group->container_users)) return -EINVAL; + if (group->noiommu && !capable(CAP_SYS_RAWIO)) + return -EPERM; + f = fdget(container_fd); if (!f.file) return -EBADF; @@ -1182,6 +1351,13 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) down_write(&container->group_lock); + /* Real groups and fake groups cannot mix */ + if (!list_empty(&container->group_list) && + container->noiommu != group->noiommu) { + ret = -EPERM; + goto unlock_out; + } + driver = container->iommu_driver; if (driver) { ret = driver->ops->attach_group(container->iommu_data, @@ -1191,6 +1367,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd) } group->container = container; + container->noiommu = group->noiommu; list_add(&group->container_next, &container->group_list); /* Get a reference on the container and mark a user within the group */ @@ -1221,6 +1398,9 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) !group->container->iommu_driver || !vfio_group_viable(group)) return -EINVAL; + if (group->noiommu && !capable(CAP_SYS_RAWIO)) + return -EPERM; + device = vfio_device_get_from_name(group, buf); if (!device) return -ENODEV; @@ -1263,6 +1443,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) fd_install(ret, filep); + if (group->noiommu) + dev_warn(device->dev, "vfio-noiommu device opened by user " + "(%s:%d)\n", current->comm, task_pid_nr(current)); + return ret; } @@ -1351,6 +1535,11 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep) if (!group) return -ENODEV; + if (group->noiommu && !capable(CAP_SYS_RAWIO)) { + vfio_group_put(group); + return -EPERM; + } + /* Do we need multiple instances of the group open? Seems not. */ opened = atomic_cmpxchg(&group->opened, 0, 1); if (opened) { @@ -1513,6 +1702,11 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep) if (!atomic_inc_not_zero(&group->container_users)) return ERR_PTR(-EINVAL); + if (group->noiommu) { + atomic_dec(&group->container_users); + return ERR_PTR(-EPERM); + } + if (!group->container->iommu_driver || !vfio_group_viable(group)) { atomic_dec(&group->container_users); |