diff options
Diffstat (limited to 'drivers/vfio')
-rw-r--r-- | drivers/vfio/Kconfig | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 8 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_config.c | 31 | ||||
-rw-r--r-- | drivers/vfio/vfio_iommu_spapr_tce.c | 23 |
4 files changed, 58 insertions, 6 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index c84333eb5eb5..9de5ed38da83 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -21,7 +21,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM_SMMU || ARM_SMMU_V3) + select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) select ANON_INODES help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cddb453a1ba5..50cdedfca9fe 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -434,10 +434,14 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { u8 pin; + + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || + vdev->nointx || vdev->pdev->is_virtfn) + return 0; + pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin) - return 1; + return pin ? 1 : 0; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { u8 pos; u16 flags; diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 115a36f6f403..423ea1f98441 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1180,8 +1180,10 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) return -ENOMEM; ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags); - if (ret) + if (ret) { + kfree(vdev->msi_perm); return ret; + } return len; } @@ -1610,6 +1612,15 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) } /* + * Nag about hardware bugs, hopefully to have vendors fix them, but at least + * to collect a list of dependencies for the VF INTx pin quirk below. + */ +static const struct pci_device_id known_bogus_vf_intx_pin[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x270c) }, + {} +}; + +/* * For each device we allocate a pci_config_map that indicates the * capability occupying each dword and thus the struct perm_bits we * use for read and write. We also allocate a virtualized config @@ -1674,6 +1685,24 @@ int vfio_config_init(struct vfio_pci_device *vdev) if (pdev->is_virtfn) { *(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor); *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); + + /* + * Per SR-IOV spec rev 1.1, 3.4.1.18 the interrupt pin register + * does not apply to VFs and VFs must implement this register + * as read-only with value zero. Userspace is not readily able + * to identify whether a device is a VF and thus that the pin + * definition on the device is bogus should it violate this + * requirement. We already virtualize the pin register for + * other purposes, so we simply need to replace the bogus value + * and consider VFs when we determine INTx IRQ count. + */ + if (vconfig[PCI_INTERRUPT_PIN] && + !pci_match_id(known_bogus_vf_intx_pin, pdev)) + pci_warn(pdev, + "Hardware bug: VF reports bogus INTx pin %d\n", + vconfig[PCI_INTERRUPT_PIN]); + + vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */ } if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 96721b154454..b30926e11d87 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -444,7 +444,7 @@ static void tce_iommu_unuse_page_v2(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; int ret; unsigned long hpa = 0; - __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry); if (!pua) return; @@ -467,8 +467,27 @@ static int tce_iommu_clear(struct tce_container *container, unsigned long oldhpa; long ret; enum dma_data_direction direction; + unsigned long lastentry = entry + pages; + + for ( ; entry < lastentry; ++entry) { + if (tbl->it_indirect_levels && tbl->it_userspace) { + /* + * For multilevel tables, we can take a shortcut here + * and skip some TCEs as we know that the userspace + * addresses cache is a mirror of the real TCE table + * and if it is missing some indirect levels, then + * the hardware table does not have them allocated + * either and therefore does not require updating. + */ + __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, + entry); + if (!pua) { + /* align to level_size which is power of two */ + entry |= tbl->it_level_size - 1; + continue; + } + } - for ( ; pages; --pages, ++entry) { cond_resched(); direction = DMA_NONE; |