diff options
Diffstat (limited to 'drivers/iommu/amd_iommu.c')
-rw-r--r-- | drivers/iommu/amd_iommu.c | 1289 |
1 files changed, 410 insertions, 879 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b607a92791d3..aac132bd1ef0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -20,6 +20,7 @@ #include <linux/scatterlist.h> #include <linux/dma-mapping.h> #include <linux/dma-direct.h> +#include <linux/dma-iommu.h> #include <linux/iommu-helper.h> #include <linux/iommu.h> #include <linux/delay.h> @@ -70,7 +71,6 @@ */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) -static DEFINE_SPINLOCK(amd_iommu_devtable_lock); static DEFINE_SPINLOCK(pd_bitmap_lock); /* List of all available dev_data structures */ @@ -89,8 +89,6 @@ const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; -static const struct dma_map_ops amd_iommu_dma_ops; - /* * general struct to manage commands send to an IOMMU */ @@ -103,21 +101,6 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); -static void iova_domain_flush_tlb(struct iova_domain *iovad); - -/* - * Data container for a dma_ops specific protection domain - */ -struct dma_ops_domain { - /* generic protection domain information */ - struct protection_domain domain; - - /* IOVA RB-Tree */ - struct iova_domain iovad; -}; - -static struct iova_domain reserved_iova_ranges; -static struct lock_class_key reserved_rbtree_key; /**************************************************************************** * @@ -125,30 +108,6 @@ static struct lock_class_key reserved_rbtree_key; * ****************************************************************************/ -static inline int match_hid_uid(struct device *dev, - struct acpihid_map_entry *entry) -{ - struct acpi_device *adev = ACPI_COMPANION(dev); - const char *hid, *uid; - - if (!adev) - return -ENODEV; - - hid = acpi_device_hid(adev); - uid = acpi_device_uid(adev); - - if (!hid || !(*hid)) - return -ENODEV; - - if (!uid || !(*uid)) - return strcmp(hid, entry->hid); - - if (!(*entry->uid)) - return strcmp(hid, entry->hid); - - return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid)); -} - static inline u16 get_pci_device_id(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -159,10 +118,14 @@ static inline u16 get_pci_device_id(struct device *dev) static inline int get_acpihid_device_id(struct device *dev, struct acpihid_map_entry **entry) { + struct acpi_device *adev = ACPI_COMPANION(dev); struct acpihid_map_entry *p; + if (!adev) + return -ENODEV; + list_for_each_entry(p, &acpihid_map, list) { - if (!match_hid_uid(dev, p)) { + if (acpi_dev_hid_uid_match(adev, p->hid, p->uid)) { if (entry) *entry = p; return p->devid; @@ -188,12 +151,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain) -{ - BUG_ON(domain->flags != PD_DMA_OPS_MASK); - return container_of(domain, struct dma_ops_domain, domain); -} - static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -202,6 +159,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; + spin_lock_init(&dev_data->lock); dev_data->devid = devid; ratelimit_default_init(&dev_data->rs); @@ -226,71 +184,58 @@ static struct iommu_dev_data *search_dev_data(u16 devid) return NULL; } -static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) -{ - *(u16 *)data = alias; - return 0; -} - -static u16 get_alias(struct device *dev) +static int clone_alias(struct pci_dev *pdev, u16 alias, void *data) { - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid, ivrs_alias, pci_alias; + u16 devid = pci_dev_id(pdev); - /* The callers make sure that get_device_id() does not fail here */ - devid = get_device_id(dev); - - /* For ACPI HID devices, we simply return the devid as such */ - if (!dev_is_pci(dev)) - return devid; + if (devid == alias) + return 0; - ivrs_alias = amd_iommu_alias_table[devid]; + amd_iommu_rlookup_table[alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[alias].data)); - pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); + return 0; +} - if (ivrs_alias == pci_alias) - return ivrs_alias; +static void clone_aliases(struct pci_dev *pdev) +{ + if (!pdev) + return; /* - * DMA alias showdown - * - * The IVRS is fairly reliable in telling us about aliases, but it - * can't know about every screwy device. If we don't have an IVRS - * reported alias, use the PCI reported alias. In that case we may - * still need to initialize the rlookup and dev_table entries if the - * alias is to a non-existent device. + * The IVRS alias stored in the alias table may not be + * part of the PCI DMA aliases if it's bus differs + * from the original device. */ - if (ivrs_alias == devid) { - if (!amd_iommu_rlookup_table[pci_alias]) { - amd_iommu_rlookup_table[pci_alias] = - amd_iommu_rlookup_table[devid]; - memcpy(amd_iommu_dev_table[pci_alias].data, - amd_iommu_dev_table[devid].data, - sizeof(amd_iommu_dev_table[pci_alias].data)); - } + clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL); - return pci_alias; - } + pci_for_each_dma_alias(pdev, clone_alias, NULL); +} + +static struct pci_dev *setup_aliases(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + u16 ivrs_alias; - pci_info(pdev, "Using IVRS reported alias %02x:%02x.%d " - "for device [%04x:%04x], kernel reported alias " - "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), pdev->vendor, pdev->device, - PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), - PCI_FUNC(pci_alias)); + /* For ACPI HID devices, there are no aliases */ + if (!dev_is_pci(dev)) + return NULL; /* - * If we don't have a PCI DMA alias and the IVRS alias is on the same - * bus, then the IVRS table may know about a quirk that we don't. + * Add the IVRS alias to the pci aliases if it is on the same + * bus. The IVRS table may know about a quirk that we don't. */ - if (pci_alias == devid && - PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { - pci_add_dma_alias(pdev, ivrs_alias & 0xff); - pci_info(pdev, "Added PCI DMA alias %02x.%d\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias)); - } + ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)]; + if (ivrs_alias != pci_dev_id(pdev) && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) + pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1); + + clone_aliases(pdev); - return ivrs_alias; + return pdev; } static struct iommu_dev_data *find_dev_data(u16 devid) @@ -428,7 +373,7 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - dev_data->alias = get_alias(dev); + dev_data->pdev = setup_aliases(dev); /* * By default we use passthrough mode for IOMMUv2 capable device. @@ -436,7 +381,7 @@ static int iommu_init_device(struct device *dev) * invalid address), we ignore the capability for the device so * it'll be forced to go into translation mode. */ - if ((iommu_pass_through || !amd_iommu_force_isolation) && + if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { struct amd_iommu *iommu; @@ -453,20 +398,16 @@ static int iommu_init_device(struct device *dev) static void iommu_ignore_device(struct device *dev) { - u16 alias; int devid; devid = get_device_id(dev); if (devid < 0) return; - alias = get_alias(dev); - + amd_iommu_rlookup_table[devid] = NULL; memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry)); - memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry)); - amd_iommu_rlookup_table[devid] = NULL; - amd_iommu_rlookup_table[alias] = NULL; + setup_aliases(dev); } static void iommu_uninit_device(struct device *dev) @@ -501,6 +442,29 @@ static void iommu_uninit_device(struct device *dev) */ } +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, + unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + /**************************************************************************** * * Interrupt handling functions @@ -560,7 +524,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - pasid = PPR_PASID(*(u64 *)&event[0]); + pasid = (event[0] & EVENT_DOMID_MASK_HI) | + (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; @@ -593,7 +558,7 @@ retry: address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; @@ -616,8 +581,7 @@ retry: pasid, address, flags); break; case EVENT_TYPE_INV_PPR_REQ: - pasid = ((event[0] >> 16) & 0xFFFF) - | ((event[1] << 6) & 0xF0000); + pasid = PPR_PASID(*((u64 *)__evt)); tag = event[1] & 0x03FF; dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), @@ -852,17 +816,18 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu, struct iommu_cmd *cmd) { u8 *target; - - target = iommu->cmd_buf + iommu->cmd_buf_tail; - - iommu->cmd_buf_tail += sizeof(*cmd); - iommu->cmd_buf_tail %= CMD_BUFFER_SIZE; + u32 tail; /* Copy command to buffer */ + tail = iommu->cmd_buf_tail; + target = iommu->cmd_buf + tail; memcpy(target, cmd, sizeof(*cmd)); + tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE; + iommu->cmd_buf_tail = tail; + /* Tell the IOMMU about it */ - writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); } static void build_completion_wait(struct iommu_cmd *cmd, u64 address) @@ -1143,6 +1108,17 @@ static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } +static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id) +{ + struct iommu_cmd cmd; + + build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + dom_id, 1); + iommu_queue_command(iommu, &cmd); + + iommu_completion_wait(iommu); +} + static void amd_iommu_flush_all(struct amd_iommu *iommu) { struct iommu_cmd cmd; @@ -1201,6 +1177,13 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, return iommu_queue_command(iommu, &cmd); } +static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + struct amd_iommu *iommu = data; + + return iommu_flush_dte(iommu, alias); +} + /* * Command send function for invalidating a device table entry */ @@ -1211,14 +1194,22 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) int ret; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = dev_data->alias; - ret = iommu_flush_dte(iommu, dev_data->devid); - if (!ret && alias != dev_data->devid) - ret = iommu_flush_dte(iommu, alias); + if (dev_data->pdev) + ret = pci_for_each_dma_alias(dev_data->pdev, + device_flush_dte_alias, iommu); + else + ret = iommu_flush_dte(iommu, dev_data->devid); if (ret) return ret; + alias = amd_iommu_alias_table[dev_data->devid]; + if (alias != dev_data->devid) { + ret = iommu_flush_dte(iommu, alias); + if (ret) + return ret; + } + if (dev_data->ats.enabled) ret = device_flush_iotlb(dev_data, 0, ~0UL); @@ -1267,12 +1258,6 @@ static void domain_flush_pages(struct protection_domain *domain, __domain_flush_pages(domain, address, size, 0); } -/* Flush the whole IO/TLB for a given protection domain */ -static void domain_flush_tlb(struct protection_domain *domain) -{ - __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); -} - /* Flush the whole IO/TLB for a given protection domain - including PDE */ static void domain_flush_tlb_pde(struct protection_domain *domain) { @@ -1300,8 +1285,12 @@ static void domain_flush_np_cache(struct protection_domain *domain, dma_addr_t iova, size_t size) { if (unlikely(amd_iommu_np_cache)) { + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); domain_flush_complete(domain); + spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1414,7 +1403,7 @@ static void free_pagetable(struct protection_domain *domain) BUG_ON(domain->mode < PAGE_MODE_NONE || domain->mode > PAGE_MODE_6_LEVEL); - free_sub_pt(root, domain->mode, freelist); + freelist = free_sub_pt(root, domain->mode, freelist); free_page_list(freelist); } @@ -1425,32 +1414,42 @@ static void free_pagetable(struct protection_domain *domain) * to 64 bits. */ static bool increase_address_space(struct protection_domain *domain, + unsigned long address, gfp_t gfp) { + unsigned long flags; + bool ret = false; u64 *pte; - if (domain->mode == PAGE_MODE_6_LEVEL) - /* address space already 64 bit large */ - return false; + spin_lock_irqsave(&domain->lock, flags); + + if (address <= PM_LEVEL_SIZE(domain->mode) || + WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) + goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) - return false; + goto out; *pte = PM_LEVEL_PDE(domain->mode, iommu_virt_to_phys(domain->pt_root)); domain->pt_root = pte; domain->mode += 1; - domain->updated = true; - return true; + ret = true; + +out: + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; } static u64 *alloc_pte(struct protection_domain *domain, unsigned long address, unsigned long page_size, u64 **pte_page, - gfp_t gfp) + gfp_t gfp, + bool *updated) { int level, end_lvl; u64 *pte, *page; @@ -1458,7 +1457,7 @@ static u64 *alloc_pte(struct protection_domain *domain, BUG_ON(!is_power_of_2(page_size)); while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); + *updated = increase_address_space(domain, address, gfp) || *updated; level = domain->mode - 1; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; @@ -1472,9 +1471,32 @@ static u64 *alloc_pte(struct protection_domain *domain, __pte = *pte; pte_level = PM_PTE_LEVEL(__pte); - if (!IOMMU_PTE_PRESENT(__pte) || + /* + * If we replace a series of large PTEs, we need + * to tear down all of them. + */ + if (IOMMU_PTE_PRESENT(__pte) && pte_level == PAGE_MODE_7_LEVEL) { + unsigned long count, i; + u64 *lpte; + + lpte = first_pte_l7(pte, NULL, &count); + + /* + * Unmap the replicated PTEs that still match the + * original large mapping + */ + for (i = 0; i < count; ++i) + cmpxchg64(&lpte[i], __pte, 0ULL); + + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_NONE) { page = (u64 *)get_zeroed_page(gfp); + if (!page) return NULL; @@ -1483,8 +1505,8 @@ static u64 *alloc_pte(struct protection_domain *domain, /* pte could have been changed somewhere. */ if (cmpxchg64(pte, __pte, __npte) != __pte) free_page((unsigned long)page); - else if (pte_level == PAGE_MODE_7_LEVEL) - domain->updated = true; + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; continue; } @@ -1549,17 +1571,12 @@ static u64 *fetch_pte(struct protection_domain *domain, *page_size = PTE_LEVEL_PAGE_SIZE(level); } - if (PM_PTE_LEVEL(*pte) == 0x07) { - unsigned long pte_mask; - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - *page_size = pte_mask = PTE_PAGE_SIZE(*pte); - pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); - pte = (u64 *)(((unsigned long)pte) & pte_mask); - } + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) + pte = first_pte_l7(pte, page_size, NULL); return pte; } @@ -1598,26 +1615,29 @@ static int iommu_map_page(struct protection_domain *dom, gfp_t gfp) { struct page *freelist = NULL; + bool updated = false; u64 __pte, *pte; - int i, count; + int ret, i, count; BUG_ON(!IS_ALIGNED(bus_addr, page_size)); BUG_ON(!IS_ALIGNED(phys_addr, page_size)); + ret = -EINVAL; if (!(prot & IOMMU_PROT_MASK)) - return -EINVAL; + goto out; count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp); + pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); + ret = -ENOMEM; if (!pte) - return -ENOMEM; + goto out; for (i = 0; i < count; ++i) freelist = free_clear_pte(&pte[i], pte[i], freelist); if (freelist != NULL) - dom->updated = true; + updated = true; if (count > 1) { __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); @@ -1633,12 +1653,21 @@ static int iommu_map_page(struct protection_domain *dom, for (i = 0; i < count; ++i) pte[i] = __pte; - update_domain(dom); + ret = 0; + +out: + if (updated) { + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + update_domain(dom); + spin_unlock_irqrestore(&dom->lock, flags); + } /* Everything flushed out, free pages now */ free_page_list(freelist); - return 0; + return ret; } static unsigned long iommu_unmap_page(struct protection_domain *dom, @@ -1676,43 +1705,6 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom, /**************************************************************************** * - * The next functions belong to the address allocator for the dma_ops - * interface functions. - * - ****************************************************************************/ - - -static unsigned long dma_ops_alloc_iova(struct device *dev, - struct dma_ops_domain *dma_dom, - unsigned int pages, u64 dma_mask) -{ - unsigned long pfn = 0; - - pages = __roundup_pow_of_two(pages); - - if (dma_mask > DMA_BIT_MASK(32)) - pfn = alloc_iova_fast(&dma_dom->iovad, pages, - IOVA_PFN(DMA_BIT_MASK(32)), false); - - if (!pfn) - pfn = alloc_iova_fast(&dma_dom->iovad, pages, - IOVA_PFN(dma_mask), true); - - return (pfn << PAGE_SHIFT); -} - -static void dma_ops_free_iova(struct dma_ops_domain *dma_dom, - unsigned long address, - unsigned int pages) -{ - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - free_iova_fast(&dma_dom->iovad, address, pages); -} - -/**************************************************************************** - * * The next functions belong to the domain allocation. A domain is * allocated for every IOMMU as the default domain. If device isolation * is enabled, every device get its own domain. The most important thing @@ -1787,38 +1779,23 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) -{ - domain_flush_tlb(&dom->domain); - domain_flush_complete(&dom->domain); -} - -static void iova_domain_flush_tlb(struct iova_domain *iovad) -{ - struct dma_ops_domain *dom; - - dom = container_of(iovad, struct dma_ops_domain, iovad); - - dma_ops_domain_flush_tlb(dom); -} - /* * Free a domain, only used if something went wrong in the * allocation path and we need to free an already allocated page table */ -static void dma_ops_domain_free(struct dma_ops_domain *dom) +static void dma_ops_domain_free(struct protection_domain *domain) { - if (!dom) + if (!domain) return; - put_iova_domain(&dom->iovad); + iommu_put_dma_cookie(&domain->domain); - free_pagetable(&dom->domain); + free_pagetable(domain); - if (dom->domain.id) - domain_id_free(dom->domain.id); + if (domain->id) + domain_id_free(domain->id); - kfree(dom); + kfree(domain); } /* @@ -1826,35 +1803,30 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also initializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(void) +static struct protection_domain *dma_ops_domain_alloc(void) { - struct dma_ops_domain *dma_dom; + struct protection_domain *domain; - dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); - if (!dma_dom) + domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL); + if (!domain) return NULL; - if (protection_domain_init(&dma_dom->domain)) - goto free_dma_dom; - - dma_dom->domain.mode = PAGE_MODE_3_LEVEL; - dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); - dma_dom->domain.flags = PD_DMA_OPS_MASK; - if (!dma_dom->domain.pt_root) - goto free_dma_dom; - - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN); + if (protection_domain_init(domain)) + goto free_domain; - if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) - goto free_dma_dom; + domain->mode = PAGE_MODE_3_LEVEL; + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); + domain->flags = PD_DMA_OPS_MASK; + if (!domain->pt_root) + goto free_domain; - /* Initialize reserved ranges */ - copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); + if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM) + goto free_domain; - return dma_dom; + return domain; -free_dma_dom: - dma_ops_domain_free(dma_dom); +free_domain: + dma_ops_domain_free(domain); return NULL; } @@ -1873,6 +1845,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, { u64 pte_root = 0; u64 flags = 0; + u32 old_domid; if (domain->mode != PAGE_MODE_NONE) pte_root = iommu_virt_to_phys(domain->pt_root); @@ -1922,8 +1895,20 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, flags &= ~DEV_DOMID_MASK; flags |= domain->id; + old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK; amd_iommu_dev_table[devid].data[1] = flags; amd_iommu_dev_table[devid].data[0] = pte_root; + + /* + * A kdump kernel might be replacing a domain ID that was copied from + * the previous kernel--if so, it needs to flush the translation cache + * entries for the old domain ID that is being overwritten + */ + if (old_domid) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + amd_iommu_flush_tlb_domid(iommu, old_domid); + } } static void clear_dte_entry(u16 devid) @@ -1939,11 +1924,9 @@ static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { struct amd_iommu *iommu; - u16 alias; bool ats; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = dev_data->alias; ats = dev_data->ats.enabled; /* Update data structures */ @@ -1956,8 +1939,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); - if (alias != dev_data->devid) - set_dte_entry(alias, domain, ats, dev_data->iommu_v2); + clone_aliases(dev_data->pdev); device_flush_dte(dev_data); } @@ -1966,17 +1948,14 @@ static void do_detach(struct iommu_dev_data *dev_data) { struct protection_domain *domain = dev_data->domain; struct amd_iommu *iommu; - u16 alias; iommu = amd_iommu_rlookup_table[dev_data->devid]; - alias = dev_data->alias; /* Update data structures */ dev_data->domain = NULL; list_del(&dev_data->list); clear_dte_entry(dev_data->devid); - if (alias != dev_data->devid) - clear_dte_entry(alias); + clone_aliases(dev_data->pdev); /* Flush the DTE entry */ device_flush_dte(dev_data); @@ -1992,36 +1971,6 @@ static void do_detach(struct iommu_dev_data *dev_data) domain->dev_cnt -= 1; } -/* - * If a device is not yet associated with a domain, this function makes the - * device visible in the domain - */ -static int __attach_device(struct iommu_dev_data *dev_data, - struct protection_domain *domain) -{ - int ret; - - /* lock domain */ - spin_lock(&domain->lock); - - ret = -EBUSY; - if (dev_data->domain != NULL) - goto out_unlock; - - /* Attach alias group root */ - do_attach(dev_data, domain); - - ret = 0; - -out_unlock: - - /* ready */ - spin_unlock(&domain->lock); - - return ret; -} - - static void pdev_iommuv2_disable(struct pci_dev *pdev) { pci_disable_ats(pdev); @@ -2103,19 +2052,28 @@ static int attach_device(struct device *dev, unsigned long flags; int ret; + spin_lock_irqsave(&domain->lock, flags); + dev_data = get_dev_data(dev); + spin_lock(&dev_data->lock); + + ret = -EBUSY; + if (dev_data->domain != NULL) + goto out; + if (!dev_is_pci(dev)) goto skip_ats_check; pdev = to_pci_dev(dev); if (domain->flags & PD_IOMMUV2_MASK) { + ret = -EINVAL; if (!dev_data->passthrough) - return -EINVAL; + goto out; if (dev_data->iommu_v2) { if (pdev_iommuv2_enable(pdev) != 0) - return -EINVAL; + goto out; dev_data->ats.enabled = true; dev_data->ats.qdep = pci_ats_queue_depth(pdev); @@ -2128,9 +2086,9 @@ static int attach_device(struct device *dev, } skip_ats_check: - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); - ret = __attach_device(dev_data, domain); - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + ret = 0; + + do_attach(dev_data, domain); /* * We might boot into a crash-kernel here. The crashed kernel @@ -2139,23 +2097,14 @@ skip_ats_check: */ domain_flush_tlb_pde(domain); - return ret; -} - -/* - * Removes a device from a protection domain (unlocked) - */ -static void __detach_device(struct iommu_dev_data *dev_data) -{ - struct protection_domain *domain; - - domain = dev_data->domain; + domain_flush_complete(domain); - spin_lock(&domain->lock); +out: + spin_unlock(&dev_data->lock); - do_detach(dev_data); + spin_unlock_irqrestore(&domain->lock, flags); - spin_unlock(&domain->lock); + return ret; } /* @@ -2170,6 +2119,10 @@ static void detach_device(struct device *dev) dev_data = get_dev_data(dev); domain = dev_data->domain; + spin_lock_irqsave(&domain->lock, flags); + + spin_lock(&dev_data->lock); + /* * First check if the device is still attached. It might already * be detached from its domain because the generic @@ -2177,15 +2130,12 @@ static void detach_device(struct device *dev) * our alias handling. */ if (WARN_ON(!dev_data->domain)) - return; + goto out; - /* lock device table */ - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(dev_data); - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + do_detach(dev_data); if (!dev_is_pci(dev)) - return; + goto out; if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) pdev_iommuv2_disable(to_pci_dev(dev)); @@ -2193,6 +2143,11 @@ static void detach_device(struct device *dev) pci_disable_ats(to_pci_dev(dev)); dev_data->ats.enabled = false; + +out: + spin_unlock(&dev_data->lock); + + spin_unlock_irqrestore(&domain->lock, flags); } static int amd_iommu_add_device(struct device *dev) @@ -2226,15 +2181,15 @@ static int amd_iommu_add_device(struct device *dev) BUG_ON(!dev_data); - if (iommu_pass_through || dev_data->iommu_v2) + if (dev_data->iommu_v2) iommu_request_dm_for_dev(dev); /* Domains are initialized for this device - have a look what we ended up with */ domain = iommu_get_domain_for_dev(dev); if (domain->type == IOMMU_DOMAIN_IDENTITY) dev_data->passthrough = true; - else - dev->dma_ops = &amd_iommu_dma_ops; + else if (domain->type == IOMMU_DOMAIN_DMA) + iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0); out: iommu_completion_wait(iommu); @@ -2268,43 +2223,32 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) return acpihid_device_group(dev); } +static int amd_iommu_domain_get_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + return -ENODEV; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = !amd_iommu_unmap_flush; + return 0; + default: + return -ENODEV; + } + break; + default: + return -EINVAL; + } +} + /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. * *****************************************************************************/ -/* - * In the dma_ops path we only have the struct device. This function - * finds the corresponding IOMMU, the protection domain and the - * requestor id for a given device. - * If the device is not yet associated with a domain this is also done - * in this function. - */ -static struct protection_domain *get_domain(struct device *dev) -{ - struct protection_domain *domain; - struct iommu_domain *io_domain; - - if (!check_device(dev)) - return ERR_PTR(-EINVAL); - - domain = get_dev_data(dev)->domain; - if (domain == NULL && get_dev_data(dev)->defer_attach) { - get_dev_data(dev)->defer_attach = false; - io_domain = iommu_get_domain_for_dev(dev); - domain = to_pdomain(io_domain); - attach_device(dev, domain); - } - if (domain == NULL) - return ERR_PTR(-EBUSY); - - if (!dma_ops_domain(domain)) - return ERR_PTR(-EBUSY); - - return domain; -} - static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; @@ -2312,468 +2256,16 @@ static void update_device_table(struct protection_domain *domain) list_for_each_entry(dev_data, &domain->dev_list, list) { set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); - - if (dev_data->devid == dev_data->alias) - continue; - - /* There is an alias, update device table entry for it */ - set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled, - dev_data->iommu_v2); + clone_aliases(dev_data->pdev); } } static void update_domain(struct protection_domain *domain) { - if (!domain->updated) - return; - update_device_table(domain); domain_flush_devices(domain); domain_flush_tlb_pde(domain); - - domain->updated = false; -} - -static int dir2prot(enum dma_data_direction direction) -{ - if (direction == DMA_TO_DEVICE) - return IOMMU_PROT_IR; - else if (direction == DMA_FROM_DEVICE) - return IOMMU_PROT_IW; - else if (direction == DMA_BIDIRECTIONAL) - return IOMMU_PROT_IW | IOMMU_PROT_IR; - else - return 0; -} - -/* - * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is used by all - * mapping functions provided with this IOMMU driver. - * Must be called with the domain lock held. - */ -static dma_addr_t __map_single(struct device *dev, - struct dma_ops_domain *dma_dom, - phys_addr_t paddr, - size_t size, - enum dma_data_direction direction, - u64 dma_mask) -{ - dma_addr_t offset = paddr & ~PAGE_MASK; - dma_addr_t address, start, ret; - unsigned int pages; - int prot = 0; - int i; - - pages = iommu_num_pages(paddr, size, PAGE_SIZE); - paddr &= PAGE_MASK; - - address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask); - if (!address) - goto out; - - prot = dir2prot(direction); - - start = address; - for (i = 0; i < pages; ++i) { - ret = iommu_map_page(&dma_dom->domain, start, paddr, - PAGE_SIZE, prot, GFP_ATOMIC); - if (ret) - goto out_unmap; - - paddr += PAGE_SIZE; - start += PAGE_SIZE; - } - address += offset; - - domain_flush_np_cache(&dma_dom->domain, address, size); - -out: - return address; - -out_unmap: - - for (--i; i >= 0; --i) { - start -= PAGE_SIZE; - iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); - } - - domain_flush_tlb(&dma_dom->domain); - domain_flush_complete(&dma_dom->domain); - - dma_ops_free_iova(dma_dom, address, pages); - - return DMA_MAPPING_ERROR; -} - -/* - * Does the reverse of the __map_single function. Must be called with - * the domain lock held too - */ -static void __unmap_single(struct dma_ops_domain *dma_dom, - dma_addr_t dma_addr, - size_t size, - int dir) -{ - dma_addr_t i, start; - unsigned int pages; - - pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - dma_addr &= PAGE_MASK; - start = dma_addr; - - for (i = 0; i < pages; ++i) { - iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); - start += PAGE_SIZE; - } - - if (amd_iommu_unmap_flush) { - domain_flush_tlb(&dma_dom->domain); - domain_flush_complete(&dma_dom->domain); - dma_ops_free_iova(dma_dom, dma_addr, pages); - } else { - pages = __roundup_pow_of_two(pages); - queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); - } -} - -/* - * The exported map_single function for dma_ops. - */ -static dma_addr_t map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = page_to_phys(page) + offset; - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - u64 dma_mask; - - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) - return (dma_addr_t)paddr; - else if (IS_ERR(domain)) - return DMA_MAPPING_ERROR; - - dma_mask = *dev->dma_mask; - dma_dom = to_dma_ops_domain(domain); - - return __map_single(dev, dma_dom, paddr, size, dir, dma_mask); -} - -/* - * The exported unmap_single function for dma_ops. - */ -static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - - domain = get_domain(dev); - if (IS_ERR(domain)) - return; - - dma_dom = to_dma_ops_domain(domain); - - __unmap_single(dma_dom, dma_addr, size, dir); -} - -static int sg_num_pages(struct device *dev, - struct scatterlist *sglist, - int nelems) -{ - unsigned long mask, boundary_size; - struct scatterlist *s; - int i, npages = 0; - - mask = dma_get_seg_boundary(dev); - boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT : - 1UL << (BITS_PER_LONG - PAGE_SHIFT); - - for_each_sg(sglist, s, nelems, i) { - int p, n; - - s->dma_address = npages << PAGE_SHIFT; - p = npages % boundary_size; - n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); - if (p + n > boundary_size) - npages += boundary_size - p; - npages += n; - } - - return npages; -} - -/* - * The exported map_sg function for dma_ops (handles scatter-gather - * lists). - */ -static int map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - int mapped_pages = 0, npages = 0, prot = 0, i; - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - struct scatterlist *s; - unsigned long address; - u64 dma_mask; - int ret; - - domain = get_domain(dev); - if (IS_ERR(domain)) - return 0; - - dma_dom = to_dma_ops_domain(domain); - dma_mask = *dev->dma_mask; - - npages = sg_num_pages(dev, sglist, nelems); - - address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask); - if (!address) - goto out_err; - - prot = dir2prot(direction); - - /* Map all sg entries */ - for_each_sg(sglist, s, nelems, i) { - int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); - - for (j = 0; j < pages; ++j) { - unsigned long bus_addr, phys_addr; - - bus_addr = address + s->dma_address + (j << PAGE_SHIFT); - phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT); - ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC); - if (ret) - goto out_unmap; - - mapped_pages += 1; - } - } - - /* Everything is mapped - write the right values into s->dma_address */ - for_each_sg(sglist, s, nelems, i) { - /* - * Add in the remaining piece of the scatter-gather offset that - * was masked out when we were determining the physical address - * via (sg_phys(s) & PAGE_MASK) earlier. - */ - s->dma_address += address + (s->offset & ~PAGE_MASK); - s->dma_length = s->length; - } - - if (s) - domain_flush_np_cache(domain, s->dma_address, s->dma_length); - - return nelems; - -out_unmap: - dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n", - npages, ret); - - for_each_sg(sglist, s, nelems, i) { - int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); - - for (j = 0; j < pages; ++j) { - unsigned long bus_addr; - - bus_addr = address + s->dma_address + (j << PAGE_SHIFT); - iommu_unmap_page(domain, bus_addr, PAGE_SIZE); - - if (--mapped_pages == 0) - goto out_free_iova; - } - } - -out_free_iova: - free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages); - -out_err: - return 0; -} - -/* - * The exported map_sg function for dma_ops (handles scatter-gather - * lists). - */ -static void unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - unsigned long startaddr; - int npages; - - domain = get_domain(dev); - if (IS_ERR(domain)) - return; - - startaddr = sg_dma_address(sglist) & PAGE_MASK; - dma_dom = to_dma_ops_domain(domain); - npages = sg_num_pages(dev, sglist, nelems); - - __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir); -} - -/* - * The exported alloc_coherent function for dma_ops. - */ -static void *alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs) -{ - u64 dma_mask = dev->coherent_dma_mask; - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - struct page *page; - - domain = get_domain(dev); - if (PTR_ERR(domain) == -EINVAL) { - page = alloc_pages(flag, get_order(size)); - *dma_addr = page_to_phys(page); - return page_address(page); - } else if (IS_ERR(domain)) - return NULL; - - dma_dom = to_dma_ops_domain(domain); - size = PAGE_ALIGN(size); - dma_mask = dev->coherent_dma_mask; - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - flag |= __GFP_ZERO; - - page = alloc_pages(flag | __GFP_NOWARN, get_order(size)); - if (!page) { - if (!gfpflags_allow_blocking(flag)) - return NULL; - - page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, - get_order(size), flag & __GFP_NOWARN); - if (!page) - return NULL; - } - - if (!dma_mask) - dma_mask = *dev->dma_mask; - - *dma_addr = __map_single(dev, dma_dom, page_to_phys(page), - size, DMA_BIDIRECTIONAL, dma_mask); - - if (*dma_addr == DMA_MAPPING_ERROR) - goto out_free; - - return page_address(page); - -out_free: - - if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); - - return NULL; -} - -/* - * The exported free_coherent function for dma_ops. - */ -static void free_coherent(struct device *dev, size_t size, - void *virt_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - struct protection_domain *domain; - struct dma_ops_domain *dma_dom; - struct page *page; - - page = virt_to_page(virt_addr); - size = PAGE_ALIGN(size); - - domain = get_domain(dev); - if (IS_ERR(domain)) - goto free_mem; - - dma_dom = to_dma_ops_domain(domain); - - __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL); - -free_mem: - if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); -} - -/* - * This function is called by the DMA layer to find out if we can handle a - * particular device. It is part of the dma_ops. - */ -static int amd_iommu_dma_supported(struct device *dev, u64 mask) -{ - if (!dma_direct_supported(dev, mask)) - return 0; - return check_device(dev); -} - -static const struct dma_map_ops amd_iommu_dma_ops = { - .alloc = alloc_coherent, - .free = free_coherent, - .map_page = map_page, - .unmap_page = unmap_page, - .map_sg = map_sg, - .unmap_sg = unmap_sg, - .dma_supported = amd_iommu_dma_supported, -}; - -static int init_reserved_iova_ranges(void) -{ - struct pci_dev *pdev = NULL; - struct iova *val; - - init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN); - - lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, - &reserved_rbtree_key); - - /* MSI memory range */ - val = reserve_iova(&reserved_iova_ranges, - IOVA_PFN(MSI_RANGE_START), IOVA_PFN(MSI_RANGE_END)); - if (!val) { - pr_err("Reserving MSI range failed\n"); - return -ENOMEM; - } - - /* HT memory range */ - val = reserve_iova(&reserved_iova_ranges, - IOVA_PFN(HT_RANGE_START), IOVA_PFN(HT_RANGE_END)); - if (!val) { - pr_err("Reserving HT range failed\n"); - return -ENOMEM; - } - - /* - * Memory used for PCI resources - * FIXME: Check whether we can reserve the PCI-hole completly - */ - for_each_pci_dev(pdev) { - int i; - - for (i = 0; i < PCI_NUM_RESOURCES; ++i) { - struct resource *r = &pdev->resource[i]; - - if (!(r->flags & IORESOURCE_MEM)) - continue; - - val = reserve_iova(&reserved_iova_ranges, - IOVA_PFN(r->start), - IOVA_PFN(r->end)); - if (!val) { - pci_err(pdev, "Reserve pci-resource range %pR failed\n", r); - return -ENOMEM; - } - } - } - - return 0; } int __init amd_iommu_init_api(void) @@ -2784,10 +2276,6 @@ int __init amd_iommu_init_api(void) if (ret) return ret; - ret = init_reserved_iova_ranges(); - if (ret) - return ret; - err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops); if (err) return err; @@ -2805,8 +2293,7 @@ int __init amd_iommu_init_api(void) int __init amd_iommu_init_dma_ops(void) { - swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0; - iommu_detected = 1; + swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0; if (amd_iommu_unmap_flush) pr_info("IO/TLB flush on unmap enabled\n"); @@ -2832,16 +2319,16 @@ static void cleanup_domain(struct protection_domain *domain) struct iommu_dev_data *entry; unsigned long flags; - spin_lock_irqsave(&amd_iommu_devtable_lock, flags); + spin_lock_irqsave(&domain->lock, flags); while (!list_empty(&domain->dev_list)) { entry = list_first_entry(&domain->dev_list, struct iommu_dev_data, list); BUG_ON(!entry->domain); - __detach_device(entry); + do_detach(entry); } - spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); } static void protection_domain_free(struct protection_domain *domain) @@ -2858,7 +2345,6 @@ static void protection_domain_free(struct protection_domain *domain) static int protection_domain_init(struct protection_domain *domain) { spin_lock_init(&domain->lock); - mutex_init(&domain->api_lock); domain->id = domain_id_alloc(); if (!domain->id) return -ENOMEM; @@ -2889,7 +2375,6 @@ out_err: static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *pdomain; - struct dma_ops_domain *dma_domain; switch (type) { case IOMMU_DOMAIN_UNMANAGED: @@ -2910,12 +2395,11 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) break; case IOMMU_DOMAIN_DMA: - dma_domain = dma_ops_domain_alloc(); - if (!dma_domain) { + pdomain = dma_ops_domain_alloc(); + if (!pdomain) { pr_err("Failed to allocate\n"); return NULL; } - pdomain = &dma_domain->domain; break; case IOMMU_DOMAIN_IDENTITY: pdomain = protection_domain_alloc(); @@ -2934,7 +2418,6 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; - struct dma_ops_domain *dma_dom; domain = to_pdomain(dom); @@ -2949,8 +2432,7 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) switch (dom->type) { case IOMMU_DOMAIN_DMA: /* Now release the domain */ - dma_dom = to_dma_ops_domain(domain); - dma_ops_domain_free(dma_dom); + dma_ops_domain_free(domain); break; default: if (domain->mode != PAGE_MODE_NONE) @@ -3006,6 +2488,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, return -EINVAL; dev_data = dev->archdata.iommu; + dev_data->defer_attach = false; iommu = amd_iommu_rlookup_table[dev_data->devid]; if (!iommu) @@ -3031,7 +2514,8 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, } static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, - phys_addr_t paddr, size_t page_size, int iommu_prot) + phys_addr_t paddr, size_t page_size, int iommu_prot, + gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); int prot = 0; @@ -3045,9 +2529,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - mutex_lock(&domain->api_lock); - ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL); - mutex_unlock(&domain->api_lock); + ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); domain_flush_np_cache(domain, iova, page_size); @@ -3055,19 +2537,15 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, } static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - size_t page_size) + size_t page_size, + struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); - size_t unmap_size; if (domain->mode == PAGE_MODE_NONE) return 0; - mutex_lock(&domain->api_lock); - unmap_size = iommu_unmap_page(domain, iova, page_size); - mutex_unlock(&domain->api_lock); - - return unmap_size; + return iommu_unmap_page(domain, iova, page_size); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, @@ -3159,28 +2637,6 @@ static void amd_iommu_get_resv_regions(struct device *dev, list_add_tail(®ion->list, head); } -static void amd_iommu_put_resv_regions(struct device *dev, - struct list_head *head) -{ - struct iommu_resv_region *entry, *next; - - list_for_each_entry_safe(entry, next, head, list) - kfree(entry); -} - -static void amd_iommu_apply_resv_region(struct device *dev, - struct iommu_domain *domain, - struct iommu_resv_region *region) -{ - struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain)); - unsigned long start, end; - - start = IOVA_PFN(region->start); - end = IOVA_PFN(region->start + region->length - 1); - - WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); -} - static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev) { @@ -3191,14 +2647,18 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct protection_domain *dom = to_pdomain(domain); + unsigned long flags; + spin_lock_irqsave(&dom->lock, flags); domain_flush_tlb_pde(dom); domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); } -static void amd_iommu_iotlb_range_add(struct iommu_domain *domain, - unsigned long iova, size_t size) +static void amd_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { + amd_iommu_flush_iotlb_all(domain); } const struct iommu_ops amd_iommu_ops = { @@ -3213,14 +2673,13 @@ const struct iommu_ops amd_iommu_ops = { .add_device = amd_iommu_add_device, .remove_device = amd_iommu_remove_device, .device_group = amd_iommu_device_group, + .domain_get_attr = amd_iommu_domain_get_attr, .get_resv_regions = amd_iommu_get_resv_regions, - .put_resv_regions = amd_iommu_put_resv_regions, - .apply_resv_region = amd_iommu_apply_resv_region, + .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .flush_iotlb_all = amd_iommu_flush_iotlb_all, - .iotlb_range_add = amd_iommu_iotlb_range_add, - .iotlb_sync = amd_iommu_flush_iotlb_all, + .iotlb_sync = amd_iommu_iotlb_sync, }; /***************************************************************************** @@ -3255,7 +2714,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom) /* Update data structure */ domain->mode = PAGE_MODE_NONE; - domain->updated = true; /* Make changes visible to IOMMUs */ update_domain(domain); @@ -3301,7 +2759,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - domain->updated = true; update_domain(domain); @@ -3530,9 +2987,23 @@ EXPORT_SYMBOL(amd_iommu_complete_ppr); struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) { struct protection_domain *pdomain; + struct iommu_domain *io_domain; + struct device *dev = &pdev->dev; + + if (!check_device(dev)) + return NULL; + + pdomain = get_dev_data(dev)->domain; + if (pdomain == NULL && get_dev_data(dev)->defer_attach) { + get_dev_data(dev)->defer_attach = false; + io_domain = iommu_get_domain_for_dev(dev); + pdomain = to_pdomain(io_domain); + attach_device(dev, pdomain); + } + if (pdomain == NULL) + return NULL; - pdomain = get_domain(&pdev->dev); - if (IS_ERR(pdomain)) + if (!dma_ops_domain(pdomain)) return NULL; /* Only return IOMMUv2 domains */ @@ -3672,7 +3143,20 @@ static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, iommu_flush_dte(iommu, devid); } -static struct irq_remap_table *alloc_irq_table(u16 devid) +static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, + void *data) +{ + struct irq_remap_table *table = data; + + irq_lookup_table[alias] = table; + set_dte_irq_entry(alias, table); + + iommu_flush_dte(amd_iommu_rlookup_table[alias], alias); + + return 0; +} + +static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; @@ -3718,7 +3202,12 @@ static struct irq_remap_table *alloc_irq_table(u16 devid) table = new_table; new_table = NULL; - set_remap_table_entry(iommu, devid, table); + if (pdev) + pci_for_each_dma_alias(pdev, set_remap_table_entry_alias, + table); + else + set_remap_table_entry(iommu, devid, table); + if (devid != alias) set_remap_table_entry(iommu, alias, table); @@ -3735,7 +3224,8 @@ out_unlock: return table; } -static int alloc_irq_index(u16 devid, int count, bool align) +static int alloc_irq_index(u16 devid, int count, bool align, + struct pci_dev *pdev) { struct irq_remap_table *table; int index, c, alignment = 1; @@ -3745,7 +3235,7 @@ static int alloc_irq_index(u16 devid, int count, bool align) if (!iommu) return -ENODEV; - table = alloc_irq_table(devid); + table = alloc_irq_table(devid, pdev); if (!table) return -ENODEV; @@ -4178,7 +3668,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct irq_remap_table *table; struct amd_iommu *iommu; - table = alloc_irq_table(devid); + table = alloc_irq_table(devid, NULL); if (table) { if (!table->min_index) { /* @@ -4195,11 +3685,15 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, } else { index = -ENOMEM; } - } else { + } else if (info->type == X86_IRQ_ALLOC_TYPE_MSI || + info->type == X86_IRQ_ALLOC_TYPE_MSIX) { bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); - index = alloc_irq_index(devid, nr_irqs, align); + index = alloc_irq_index(devid, nr_irqs, align, info->msi_dev); + } else { + index = alloc_irq_index(devid, nr_irqs, false, NULL); } + if (index < 0) { pr_warn("Failed to allocate IRTE\n"); ret = index; @@ -4313,13 +3807,62 @@ static const struct irq_domain_ops amd_ir_domain_ops = { .deactivate = irq_remapping_deactivate, }; +int amd_iommu_activate_guest_mode(void *data) +{ + struct amd_ir_data *ir_data = (struct amd_ir_data *)data; + struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + + if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || + !entry || entry->lo.fields_vapic.guest_mode) + return 0; + + entry->lo.val = 0; + entry->hi.val = 0; + + entry->lo.fields_vapic.guest_mode = 1; + entry->lo.fields_vapic.ga_log_intr = 1; + entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr; + entry->hi.fields.vector = ir_data->ga_vector; + entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; + + return modify_irte_ga(ir_data->irq_2_irte.devid, + ir_data->irq_2_irte.index, entry, NULL); +} +EXPORT_SYMBOL(amd_iommu_activate_guest_mode); + +int amd_iommu_deactivate_guest_mode(void *data) +{ + struct amd_ir_data *ir_data = (struct amd_ir_data *)data; + struct irte_ga *entry = (struct irte_ga *) ir_data->entry; + struct irq_cfg *cfg = ir_data->cfg; + + if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) || + !entry || !entry->lo.fields_vapic.guest_mode) + return 0; + + entry->lo.val = 0; + entry->hi.val = 0; + + entry->lo.fields_remap.dm = apic->irq_dest_mode; + entry->lo.fields_remap.int_type = apic->irq_delivery_mode; + entry->hi.fields.vector = cfg->vector; + entry->lo.fields_remap.destination = + APICID_TO_IRTE_DEST_LO(cfg->dest_apicid); + entry->hi.fields.destination = + APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); + + return modify_irte_ga(ir_data->irq_2_irte.devid, + ir_data->irq_2_irte.index, entry, NULL); +} +EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); + static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) { + int ret; struct amd_iommu *iommu; struct amd_iommu_pi_data *pi_data = vcpu_info; struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data; struct amd_ir_data *ir_data = data->chip_data; - struct irte_ga *irte = (struct irte_ga *) ir_data->entry; struct irq_2_irte *irte_info = &ir_data->irq_2_irte; struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid); @@ -4330,6 +3873,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) if (!dev_data || !dev_data->use_vapic) return 0; + ir_data->cfg = irqd_cfg(data); pi_data->ir_data = ir_data; /* Note: @@ -4348,37 +3892,24 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) pi_data->prev_ga_tag = ir_data->cached_ga_tag; if (pi_data->is_guest_mode) { - /* Setting */ - irte->hi.fields.ga_root_ptr = (pi_data->base >> 12); - irte->hi.fields.vector = vcpu_pi_info->vector; - irte->lo.fields_vapic.ga_log_intr = 1; - irte->lo.fields_vapic.guest_mode = 1; - irte->lo.fields_vapic.ga_tag = pi_data->ga_tag; - - ir_data->cached_ga_tag = pi_data->ga_tag; + ir_data->ga_root_ptr = (pi_data->base >> 12); + ir_data->ga_vector = vcpu_pi_info->vector; + ir_data->ga_tag = pi_data->ga_tag; + ret = amd_iommu_activate_guest_mode(ir_data); + if (!ret) + ir_data->cached_ga_tag = pi_data->ga_tag; } else { - /* Un-Setting */ - struct irq_cfg *cfg = irqd_cfg(data); - - irte->hi.val = 0; - irte->lo.val = 0; - irte->hi.fields.vector = cfg->vector; - irte->lo.fields_remap.guest_mode = 0; - irte->lo.fields_remap.destination = - APICID_TO_IRTE_DEST_LO(cfg->dest_apicid); - irte->hi.fields.destination = - APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); - irte->lo.fields_remap.int_type = apic->irq_delivery_mode; - irte->lo.fields_remap.dm = apic->irq_dest_mode; + ret = amd_iommu_deactivate_guest_mode(ir_data); /* * This communicates the ga_tag back to the caller * so that it can do all the necessary clean up. */ - ir_data->cached_ga_tag = 0; + if (!ret) + ir_data->cached_ga_tag = 0; } - return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data); + return ret; } |