summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/amd_iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd_iommu.c')
-rw-r--r--drivers/iommu/amd_iommu.c1289
1 files changed, 410 insertions, 879 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b607a92791d3..aac132bd1ef0 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -20,6 +20,7 @@
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#include <linux/dma-direct.h>
+#include <linux/dma-iommu.h>
#include <linux/iommu-helper.h>
#include <linux/iommu.h>
#include <linux/delay.h>
@@ -70,7 +71,6 @@
*/
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */
@@ -89,8 +89,6 @@ const struct iommu_ops amd_iommu_ops;
static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
-static const struct dma_map_ops amd_iommu_dma_ops;
-
/*
* general struct to manage commands send to an IOMMU
*/
@@ -103,21 +101,6 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
-static void iova_domain_flush_tlb(struct iova_domain *iovad);
-
-/*
- * Data container for a dma_ops specific protection domain
- */
-struct dma_ops_domain {
- /* generic protection domain information */
- struct protection_domain domain;
-
- /* IOVA RB-Tree */
- struct iova_domain iovad;
-};
-
-static struct iova_domain reserved_iova_ranges;
-static struct lock_class_key reserved_rbtree_key;
/****************************************************************************
*
@@ -125,30 +108,6 @@ static struct lock_class_key reserved_rbtree_key;
*
****************************************************************************/
-static inline int match_hid_uid(struct device *dev,
- struct acpihid_map_entry *entry)
-{
- struct acpi_device *adev = ACPI_COMPANION(dev);
- const char *hid, *uid;
-
- if (!adev)
- return -ENODEV;
-
- hid = acpi_device_hid(adev);
- uid = acpi_device_uid(adev);
-
- if (!hid || !(*hid))
- return -ENODEV;
-
- if (!uid || !(*uid))
- return strcmp(hid, entry->hid);
-
- if (!(*entry->uid))
- return strcmp(hid, entry->hid);
-
- return (strcmp(hid, entry->hid) || strcmp(uid, entry->uid));
-}
-
static inline u16 get_pci_device_id(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -159,10 +118,14 @@ static inline u16 get_pci_device_id(struct device *dev)
static inline int get_acpihid_device_id(struct device *dev,
struct acpihid_map_entry **entry)
{
+ struct acpi_device *adev = ACPI_COMPANION(dev);
struct acpihid_map_entry *p;
+ if (!adev)
+ return -ENODEV;
+
list_for_each_entry(p, &acpihid_map, list) {
- if (!match_hid_uid(dev, p)) {
+ if (acpi_dev_hid_uid_match(adev, p->hid, p->uid)) {
if (entry)
*entry = p;
return p->devid;
@@ -188,12 +151,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
-static struct dma_ops_domain* to_dma_ops_domain(struct protection_domain *domain)
-{
- BUG_ON(domain->flags != PD_DMA_OPS_MASK);
- return container_of(domain, struct dma_ops_domain, domain);
-}
-
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -202,6 +159,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
if (!dev_data)
return NULL;
+ spin_lock_init(&dev_data->lock);
dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs);
@@ -226,71 +184,58 @@ static struct iommu_dev_data *search_dev_data(u16 devid)
return NULL;
}
-static int __last_alias(struct pci_dev *pdev, u16 alias, void *data)
-{
- *(u16 *)data = alias;
- return 0;
-}
-
-static u16 get_alias(struct device *dev)
+static int clone_alias(struct pci_dev *pdev, u16 alias, void *data)
{
- struct pci_dev *pdev = to_pci_dev(dev);
- u16 devid, ivrs_alias, pci_alias;
+ u16 devid = pci_dev_id(pdev);
- /* The callers make sure that get_device_id() does not fail here */
- devid = get_device_id(dev);
-
- /* For ACPI HID devices, we simply return the devid as such */
- if (!dev_is_pci(dev))
- return devid;
+ if (devid == alias)
+ return 0;
- ivrs_alias = amd_iommu_alias_table[devid];
+ amd_iommu_rlookup_table[alias] =
+ amd_iommu_rlookup_table[devid];
+ memcpy(amd_iommu_dev_table[alias].data,
+ amd_iommu_dev_table[devid].data,
+ sizeof(amd_iommu_dev_table[alias].data));
- pci_for_each_dma_alias(pdev, __last_alias, &pci_alias);
+ return 0;
+}
- if (ivrs_alias == pci_alias)
- return ivrs_alias;
+static void clone_aliases(struct pci_dev *pdev)
+{
+ if (!pdev)
+ return;
/*
- * DMA alias showdown
- *
- * The IVRS is fairly reliable in telling us about aliases, but it
- * can't know about every screwy device. If we don't have an IVRS
- * reported alias, use the PCI reported alias. In that case we may
- * still need to initialize the rlookup and dev_table entries if the
- * alias is to a non-existent device.
+ * The IVRS alias stored in the alias table may not be
+ * part of the PCI DMA aliases if it's bus differs
+ * from the original device.
*/
- if (ivrs_alias == devid) {
- if (!amd_iommu_rlookup_table[pci_alias]) {
- amd_iommu_rlookup_table[pci_alias] =
- amd_iommu_rlookup_table[devid];
- memcpy(amd_iommu_dev_table[pci_alias].data,
- amd_iommu_dev_table[devid].data,
- sizeof(amd_iommu_dev_table[pci_alias].data));
- }
+ clone_alias(pdev, amd_iommu_alias_table[pci_dev_id(pdev)], NULL);
- return pci_alias;
- }
+ pci_for_each_dma_alias(pdev, clone_alias, NULL);
+}
+
+static struct pci_dev *setup_aliases(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 ivrs_alias;
- pci_info(pdev, "Using IVRS reported alias %02x:%02x.%d "
- "for device [%04x:%04x], kernel reported alias "
- "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias),
- PCI_FUNC(ivrs_alias), pdev->vendor, pdev->device,
- PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias),
- PCI_FUNC(pci_alias));
+ /* For ACPI HID devices, there are no aliases */
+ if (!dev_is_pci(dev))
+ return NULL;
/*
- * If we don't have a PCI DMA alias and the IVRS alias is on the same
- * bus, then the IVRS table may know about a quirk that we don't.
+ * Add the IVRS alias to the pci aliases if it is on the same
+ * bus. The IVRS table may know about a quirk that we don't.
*/
- if (pci_alias == devid &&
- PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) {
- pci_add_dma_alias(pdev, ivrs_alias & 0xff);
- pci_info(pdev, "Added PCI DMA alias %02x.%d\n",
- PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias));
- }
+ ivrs_alias = amd_iommu_alias_table[pci_dev_id(pdev)];
+ if (ivrs_alias != pci_dev_id(pdev) &&
+ PCI_BUS_NUM(ivrs_alias) == pdev->bus->number)
+ pci_add_dma_alias(pdev, ivrs_alias & 0xff, 1);
+
+ clone_aliases(pdev);
- return ivrs_alias;
+ return pdev;
}
static struct iommu_dev_data *find_dev_data(u16 devid)
@@ -428,7 +373,7 @@ static int iommu_init_device(struct device *dev)
if (!dev_data)
return -ENOMEM;
- dev_data->alias = get_alias(dev);
+ dev_data->pdev = setup_aliases(dev);
/*
* By default we use passthrough mode for IOMMUv2 capable device.
@@ -436,7 +381,7 @@ static int iommu_init_device(struct device *dev)
* invalid address), we ignore the capability for the device so
* it'll be forced to go into translation mode.
*/
- if ((iommu_pass_through || !amd_iommu_force_isolation) &&
+ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
struct amd_iommu *iommu;
@@ -453,20 +398,16 @@ static int iommu_init_device(struct device *dev)
static void iommu_ignore_device(struct device *dev)
{
- u16 alias;
int devid;
devid = get_device_id(dev);
if (devid < 0)
return;
- alias = get_alias(dev);
-
+ amd_iommu_rlookup_table[devid] = NULL;
memset(&amd_iommu_dev_table[devid], 0, sizeof(struct dev_table_entry));
- memset(&amd_iommu_dev_table[alias], 0, sizeof(struct dev_table_entry));
- amd_iommu_rlookup_table[devid] = NULL;
- amd_iommu_rlookup_table[alias] = NULL;
+ setup_aliases(dev);
}
static void iommu_uninit_device(struct device *dev)
@@ -501,6 +442,29 @@ static void iommu_uninit_device(struct device *dev)
*/
}
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+ unsigned long *count)
+{
+ unsigned long pte_mask, pg_size, cnt;
+ u64 *fpte;
+
+ pg_size = PTE_PAGE_SIZE(*pte);
+ cnt = PAGE_SIZE_PTE_COUNT(pg_size);
+ pte_mask = ~((cnt << 3) - 1);
+ fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+ if (page_size)
+ *page_size = pg_size;
+
+ if (count)
+ *count = cnt;
+
+ return fpte;
+}
+
/****************************************************************************
*
* Interrupt handling functions
@@ -560,7 +524,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
retry:
type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
- pasid = PPR_PASID(*(u64 *)&event[0]);
+ pasid = (event[0] & EVENT_DOMID_MASK_HI) |
+ (event[1] & EVENT_DOMID_MASK_LO);
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
@@ -593,7 +558,7 @@ retry:
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
@@ -616,8 +581,7 @@ retry:
pasid, address, flags);
break;
case EVENT_TYPE_INV_PPR_REQ:
- pasid = ((event[0] >> 16) & 0xFFFF)
- | ((event[1] << 6) & 0xF0000);
+ pasid = PPR_PASID(*((u64 *)__evt));
tag = event[1] & 0x03FF;
dev_err(dev, "Event logged [INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%llx flags=0x%04x tag=0x%03x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
@@ -852,17 +816,18 @@ static void copy_cmd_to_buffer(struct amd_iommu *iommu,
struct iommu_cmd *cmd)
{
u8 *target;
-
- target = iommu->cmd_buf + iommu->cmd_buf_tail;
-
- iommu->cmd_buf_tail += sizeof(*cmd);
- iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
+ u32 tail;
/* Copy command to buffer */
+ tail = iommu->cmd_buf_tail;
+ target = iommu->cmd_buf + tail;
memcpy(target, cmd, sizeof(*cmd));
+ tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+ iommu->cmd_buf_tail = tail;
+
/* Tell the IOMMU about it */
- writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
}
static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1143,6 +1108,17 @@ static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu)
iommu_completion_wait(iommu);
}
+static void amd_iommu_flush_tlb_domid(struct amd_iommu *iommu, u32 dom_id)
+{
+ struct iommu_cmd cmd;
+
+ build_inv_iommu_pages(&cmd, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
+ dom_id, 1);
+ iommu_queue_command(iommu, &cmd);
+
+ iommu_completion_wait(iommu);
+}
+
static void amd_iommu_flush_all(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
@@ -1201,6 +1177,13 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
return iommu_queue_command(iommu, &cmd);
}
+static int device_flush_dte_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+ struct amd_iommu *iommu = data;
+
+ return iommu_flush_dte(iommu, alias);
+}
+
/*
* Command send function for invalidating a device table entry
*/
@@ -1211,14 +1194,22 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
int ret;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- alias = dev_data->alias;
- ret = iommu_flush_dte(iommu, dev_data->devid);
- if (!ret && alias != dev_data->devid)
- ret = iommu_flush_dte(iommu, alias);
+ if (dev_data->pdev)
+ ret = pci_for_each_dma_alias(dev_data->pdev,
+ device_flush_dte_alias, iommu);
+ else
+ ret = iommu_flush_dte(iommu, dev_data->devid);
if (ret)
return ret;
+ alias = amd_iommu_alias_table[dev_data->devid];
+ if (alias != dev_data->devid) {
+ ret = iommu_flush_dte(iommu, alias);
+ if (ret)
+ return ret;
+ }
+
if (dev_data->ats.enabled)
ret = device_flush_iotlb(dev_data, 0, ~0UL);
@@ -1267,12 +1258,6 @@ static void domain_flush_pages(struct protection_domain *domain,
__domain_flush_pages(domain, address, size, 0);
}
-/* Flush the whole IO/TLB for a given protection domain */
-static void domain_flush_tlb(struct protection_domain *domain)
-{
- __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0);
-}
-
/* Flush the whole IO/TLB for a given protection domain - including PDE */
static void domain_flush_tlb_pde(struct protection_domain *domain)
{
@@ -1300,8 +1285,12 @@ static void domain_flush_np_cache(struct protection_domain *domain,
dma_addr_t iova, size_t size)
{
if (unlikely(amd_iommu_np_cache)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
domain_flush_complete(domain);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
}
@@ -1414,7 +1403,7 @@ static void free_pagetable(struct protection_domain *domain)
BUG_ON(domain->mode < PAGE_MODE_NONE ||
domain->mode > PAGE_MODE_6_LEVEL);
- free_sub_pt(root, domain->mode, freelist);
+ freelist = free_sub_pt(root, domain->mode, freelist);
free_page_list(freelist);
}
@@ -1425,32 +1414,42 @@ static void free_pagetable(struct protection_domain *domain)
* to 64 bits.
*/
static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
gfp_t gfp)
{
+ unsigned long flags;
+ bool ret = false;
u64 *pte;
- if (domain->mode == PAGE_MODE_6_LEVEL)
- /* address space already 64 bit large */
- return false;
+ spin_lock_irqsave(&domain->lock, flags);
+
+ if (address <= PM_LEVEL_SIZE(domain->mode) ||
+ WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
+ goto out;
pte = (void *)get_zeroed_page(gfp);
if (!pte)
- return false;
+ goto out;
*pte = PM_LEVEL_PDE(domain->mode,
iommu_virt_to_phys(domain->pt_root));
domain->pt_root = pte;
domain->mode += 1;
- domain->updated = true;
- return true;
+ ret = true;
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+
+ return ret;
}
static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address,
unsigned long page_size,
u64 **pte_page,
- gfp_t gfp)
+ gfp_t gfp,
+ bool *updated)
{
int level, end_lvl;
u64 *pte, *page;
@@ -1458,7 +1457,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->mode))
- increase_address_space(domain, gfp);
+ *updated = increase_address_space(domain, address, gfp) || *updated;
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
@@ -1472,9 +1471,32 @@ static u64 *alloc_pte(struct protection_domain *domain,
__pte = *pte;
pte_level = PM_PTE_LEVEL(__pte);
- if (!IOMMU_PTE_PRESENT(__pte) ||
+ /*
+ * If we replace a series of large PTEs, we need
+ * to tear down all of them.
+ */
+ if (IOMMU_PTE_PRESENT(__pte) &&
pte_level == PAGE_MODE_7_LEVEL) {
+ unsigned long count, i;
+ u64 *lpte;
+
+ lpte = first_pte_l7(pte, NULL, &count);
+
+ /*
+ * Unmap the replicated PTEs that still match the
+ * original large mapping
+ */
+ for (i = 0; i < count; ++i)
+ cmpxchg64(&lpte[i], __pte, 0ULL);
+
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_NONE) {
page = (u64 *)get_zeroed_page(gfp);
+
if (!page)
return NULL;
@@ -1483,8 +1505,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
/* pte could have been changed somewhere. */
if (cmpxchg64(pte, __pte, __npte) != __pte)
free_page((unsigned long)page);
- else if (pte_level == PAGE_MODE_7_LEVEL)
- domain->updated = true;
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
continue;
}
@@ -1549,17 +1571,12 @@ static u64 *fetch_pte(struct protection_domain *domain,
*page_size = PTE_LEVEL_PAGE_SIZE(level);
}
- if (PM_PTE_LEVEL(*pte) == 0x07) {
- unsigned long pte_mask;
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- *page_size = pte_mask = PTE_PAGE_SIZE(*pte);
- pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
- pte = (u64 *)(((unsigned long)pte) & pte_mask);
- }
+ /*
+ * If we have a series of large PTEs, make
+ * sure to return a pointer to the first one.
+ */
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+ pte = first_pte_l7(pte, page_size, NULL);
return pte;
}
@@ -1598,26 +1615,29 @@ static int iommu_map_page(struct protection_domain *dom,
gfp_t gfp)
{
struct page *freelist = NULL;
+ bool updated = false;
u64 __pte, *pte;
- int i, count;
+ int ret, i, count;
BUG_ON(!IS_ALIGNED(bus_addr, page_size));
BUG_ON(!IS_ALIGNED(phys_addr, page_size));
+ ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK))
- return -EINVAL;
+ goto out;
count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp);
+ pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
+ ret = -ENOMEM;
if (!pte)
- return -ENOMEM;
+ goto out;
for (i = 0; i < count; ++i)
freelist = free_clear_pte(&pte[i], pte[i], freelist);
if (freelist != NULL)
- dom->updated = true;
+ updated = true;
if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
@@ -1633,12 +1653,21 @@ static int iommu_map_page(struct protection_domain *dom,
for (i = 0; i < count; ++i)
pte[i] = __pte;
- update_domain(dom);
+ ret = 0;
+
+out:
+ if (updated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ update_domain(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
/* Everything flushed out, free pages now */
free_page_list(freelist);
- return 0;
+ return ret;
}
static unsigned long iommu_unmap_page(struct protection_domain *dom,
@@ -1676,43 +1705,6 @@ static unsigned long iommu_unmap_page(struct protection_domain *dom,
/****************************************************************************
*
- * The next functions belong to the address allocator for the dma_ops
- * interface functions.
- *
- ****************************************************************************/
-
-
-static unsigned long dma_ops_alloc_iova(struct device *dev,
- struct dma_ops_domain *dma_dom,
- unsigned int pages, u64 dma_mask)
-{
- unsigned long pfn = 0;
-
- pages = __roundup_pow_of_two(pages);
-
- if (dma_mask > DMA_BIT_MASK(32))
- pfn = alloc_iova_fast(&dma_dom->iovad, pages,
- IOVA_PFN(DMA_BIT_MASK(32)), false);
-
- if (!pfn)
- pfn = alloc_iova_fast(&dma_dom->iovad, pages,
- IOVA_PFN(dma_mask), true);
-
- return (pfn << PAGE_SHIFT);
-}
-
-static void dma_ops_free_iova(struct dma_ops_domain *dma_dom,
- unsigned long address,
- unsigned int pages)
-{
- pages = __roundup_pow_of_two(pages);
- address >>= PAGE_SHIFT;
-
- free_iova_fast(&dma_dom->iovad, address, pages);
-}
-
-/****************************************************************************
- *
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
@@ -1787,38 +1779,23 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
-static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
-{
- domain_flush_tlb(&dom->domain);
- domain_flush_complete(&dom->domain);
-}
-
-static void iova_domain_flush_tlb(struct iova_domain *iovad)
-{
- struct dma_ops_domain *dom;
-
- dom = container_of(iovad, struct dma_ops_domain, iovad);
-
- dma_ops_domain_flush_tlb(dom);
-}
-
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
*/
-static void dma_ops_domain_free(struct dma_ops_domain *dom)
+static void dma_ops_domain_free(struct protection_domain *domain)
{
- if (!dom)
+ if (!domain)
return;
- put_iova_domain(&dom->iovad);
+ iommu_put_dma_cookie(&domain->domain);
- free_pagetable(&dom->domain);
+ free_pagetable(domain);
- if (dom->domain.id)
- domain_id_free(dom->domain.id);
+ if (domain->id)
+ domain_id_free(domain->id);
- kfree(dom);
+ kfree(domain);
}
/*
@@ -1826,35 +1803,30 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
* It also initializes the page table and the address allocator data
* structures required for the dma_ops interface
*/
-static struct dma_ops_domain *dma_ops_domain_alloc(void)
+static struct protection_domain *dma_ops_domain_alloc(void)
{
- struct dma_ops_domain *dma_dom;
+ struct protection_domain *domain;
- dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
- if (!dma_dom)
+ domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL);
+ if (!domain)
return NULL;
- if (protection_domain_init(&dma_dom->domain))
- goto free_dma_dom;
-
- dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
- dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- dma_dom->domain.flags = PD_DMA_OPS_MASK;
- if (!dma_dom->domain.pt_root)
- goto free_dma_dom;
-
- init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
+ if (protection_domain_init(domain))
+ goto free_domain;
- if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
- goto free_dma_dom;
+ domain->mode = PAGE_MODE_3_LEVEL;
+ domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
+ domain->flags = PD_DMA_OPS_MASK;
+ if (!domain->pt_root)
+ goto free_domain;
- /* Initialize reserved ranges */
- copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
+ if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
+ goto free_domain;
- return dma_dom;
+ return domain;
-free_dma_dom:
- dma_ops_domain_free(dma_dom);
+free_domain:
+ dma_ops_domain_free(domain);
return NULL;
}
@@ -1873,6 +1845,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain,
{
u64 pte_root = 0;
u64 flags = 0;
+ u32 old_domid;
if (domain->mode != PAGE_MODE_NONE)
pte_root = iommu_virt_to_phys(domain->pt_root);
@@ -1922,8 +1895,20 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain,
flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
+ old_domid = amd_iommu_dev_table[devid].data[1] & DEV_DOMID_MASK;
amd_iommu_dev_table[devid].data[1] = flags;
amd_iommu_dev_table[devid].data[0] = pte_root;
+
+ /*
+ * A kdump kernel might be replacing a domain ID that was copied from
+ * the previous kernel--if so, it needs to flush the translation cache
+ * entries for the old domain ID that is being overwritten
+ */
+ if (old_domid) {
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ amd_iommu_flush_tlb_domid(iommu, old_domid);
+ }
}
static void clear_dte_entry(u16 devid)
@@ -1939,11 +1924,9 @@ static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
struct amd_iommu *iommu;
- u16 alias;
bool ats;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- alias = dev_data->alias;
ats = dev_data->ats.enabled;
/* Update data structures */
@@ -1956,8 +1939,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
/* Update device table */
set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2);
- if (alias != dev_data->devid)
- set_dte_entry(alias, domain, ats, dev_data->iommu_v2);
+ clone_aliases(dev_data->pdev);
device_flush_dte(dev_data);
}
@@ -1966,17 +1948,14 @@ static void do_detach(struct iommu_dev_data *dev_data)
{
struct protection_domain *domain = dev_data->domain;
struct amd_iommu *iommu;
- u16 alias;
iommu = amd_iommu_rlookup_table[dev_data->devid];
- alias = dev_data->alias;
/* Update data structures */
dev_data->domain = NULL;
list_del(&dev_data->list);
clear_dte_entry(dev_data->devid);
- if (alias != dev_data->devid)
- clear_dte_entry(alias);
+ clone_aliases(dev_data->pdev);
/* Flush the DTE entry */
device_flush_dte(dev_data);
@@ -1992,36 +1971,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
domain->dev_cnt -= 1;
}
-/*
- * If a device is not yet associated with a domain, this function makes the
- * device visible in the domain
- */
-static int __attach_device(struct iommu_dev_data *dev_data,
- struct protection_domain *domain)
-{
- int ret;
-
- /* lock domain */
- spin_lock(&domain->lock);
-
- ret = -EBUSY;
- if (dev_data->domain != NULL)
- goto out_unlock;
-
- /* Attach alias group root */
- do_attach(dev_data, domain);
-
- ret = 0;
-
-out_unlock:
-
- /* ready */
- spin_unlock(&domain->lock);
-
- return ret;
-}
-
-
static void pdev_iommuv2_disable(struct pci_dev *pdev)
{
pci_disable_ats(pdev);
@@ -2103,19 +2052,28 @@ static int attach_device(struct device *dev,
unsigned long flags;
int ret;
+ spin_lock_irqsave(&domain->lock, flags);
+
dev_data = get_dev_data(dev);
+ spin_lock(&dev_data->lock);
+
+ ret = -EBUSY;
+ if (dev_data->domain != NULL)
+ goto out;
+
if (!dev_is_pci(dev))
goto skip_ats_check;
pdev = to_pci_dev(dev);
if (domain->flags & PD_IOMMUV2_MASK) {
+ ret = -EINVAL;
if (!dev_data->passthrough)
- return -EINVAL;
+ goto out;
if (dev_data->iommu_v2) {
if (pdev_iommuv2_enable(pdev) != 0)
- return -EINVAL;
+ goto out;
dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev);
@@ -2128,9 +2086,9 @@ static int attach_device(struct device *dev,
}
skip_ats_check:
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
- ret = __attach_device(dev_data, domain);
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ ret = 0;
+
+ do_attach(dev_data, domain);
/*
* We might boot into a crash-kernel here. The crashed kernel
@@ -2139,23 +2097,14 @@ skip_ats_check:
*/
domain_flush_tlb_pde(domain);
- return ret;
-}
-
-/*
- * Removes a device from a protection domain (unlocked)
- */
-static void __detach_device(struct iommu_dev_data *dev_data)
-{
- struct protection_domain *domain;
-
- domain = dev_data->domain;
+ domain_flush_complete(domain);
- spin_lock(&domain->lock);
+out:
+ spin_unlock(&dev_data->lock);
- do_detach(dev_data);
+ spin_unlock_irqrestore(&domain->lock, flags);
- spin_unlock(&domain->lock);
+ return ret;
}
/*
@@ -2170,6 +2119,10 @@ static void detach_device(struct device *dev)
dev_data = get_dev_data(dev);
domain = dev_data->domain;
+ spin_lock_irqsave(&domain->lock, flags);
+
+ spin_lock(&dev_data->lock);
+
/*
* First check if the device is still attached. It might already
* be detached from its domain because the generic
@@ -2177,15 +2130,12 @@ static void detach_device(struct device *dev)
* our alias handling.
*/
if (WARN_ON(!dev_data->domain))
- return;
+ goto out;
- /* lock device table */
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
- __detach_device(dev_data);
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ do_detach(dev_data);
if (!dev_is_pci(dev))
- return;
+ goto out;
if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
pdev_iommuv2_disable(to_pci_dev(dev));
@@ -2193,6 +2143,11 @@ static void detach_device(struct device *dev)
pci_disable_ats(to_pci_dev(dev));
dev_data->ats.enabled = false;
+
+out:
+ spin_unlock(&dev_data->lock);
+
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static int amd_iommu_add_device(struct device *dev)
@@ -2226,15 +2181,15 @@ static int amd_iommu_add_device(struct device *dev)
BUG_ON(!dev_data);
- if (iommu_pass_through || dev_data->iommu_v2)
+ if (dev_data->iommu_v2)
iommu_request_dm_for_dev(dev);
/* Domains are initialized for this device - have a look what we ended up with */
domain = iommu_get_domain_for_dev(dev);
if (domain->type == IOMMU_DOMAIN_IDENTITY)
dev_data->passthrough = true;
- else
- dev->dma_ops = &amd_iommu_dma_ops;
+ else if (domain->type == IOMMU_DOMAIN_DMA)
+ iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
out:
iommu_completion_wait(iommu);
@@ -2268,43 +2223,32 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
return acpihid_device_group(dev);
}
+static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data)
+{
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ return -ENODEV;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = !amd_iommu_unmap_flush;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+}
+
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
*
*****************************************************************************/
-/*
- * In the dma_ops path we only have the struct device. This function
- * finds the corresponding IOMMU, the protection domain and the
- * requestor id for a given device.
- * If the device is not yet associated with a domain this is also done
- * in this function.
- */
-static struct protection_domain *get_domain(struct device *dev)
-{
- struct protection_domain *domain;
- struct iommu_domain *io_domain;
-
- if (!check_device(dev))
- return ERR_PTR(-EINVAL);
-
- domain = get_dev_data(dev)->domain;
- if (domain == NULL && get_dev_data(dev)->defer_attach) {
- get_dev_data(dev)->defer_attach = false;
- io_domain = iommu_get_domain_for_dev(dev);
- domain = to_pdomain(io_domain);
- attach_device(dev, domain);
- }
- if (domain == NULL)
- return ERR_PTR(-EBUSY);
-
- if (!dma_ops_domain(domain))
- return ERR_PTR(-EBUSY);
-
- return domain;
-}
-
static void update_device_table(struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
@@ -2312,468 +2256,16 @@ static void update_device_table(struct protection_domain *domain)
list_for_each_entry(dev_data, &domain->dev_list, list) {
set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled,
dev_data->iommu_v2);
-
- if (dev_data->devid == dev_data->alias)
- continue;
-
- /* There is an alias, update device table entry for it */
- set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled,
- dev_data->iommu_v2);
+ clone_aliases(dev_data->pdev);
}
}
static void update_domain(struct protection_domain *domain)
{
- if (!domain->updated)
- return;
-
update_device_table(domain);
domain_flush_devices(domain);
domain_flush_tlb_pde(domain);
-
- domain->updated = false;
-}
-
-static int dir2prot(enum dma_data_direction direction)
-{
- if (direction == DMA_TO_DEVICE)
- return IOMMU_PROT_IR;
- else if (direction == DMA_FROM_DEVICE)
- return IOMMU_PROT_IW;
- else if (direction == DMA_BIDIRECTIONAL)
- return IOMMU_PROT_IW | IOMMU_PROT_IR;
- else
- return 0;
-}
-
-/*
- * This function contains common code for mapping of a physically
- * contiguous memory region into DMA address space. It is used by all
- * mapping functions provided with this IOMMU driver.
- * Must be called with the domain lock held.
- */
-static dma_addr_t __map_single(struct device *dev,
- struct dma_ops_domain *dma_dom,
- phys_addr_t paddr,
- size_t size,
- enum dma_data_direction direction,
- u64 dma_mask)
-{
- dma_addr_t offset = paddr & ~PAGE_MASK;
- dma_addr_t address, start, ret;
- unsigned int pages;
- int prot = 0;
- int i;
-
- pages = iommu_num_pages(paddr, size, PAGE_SIZE);
- paddr &= PAGE_MASK;
-
- address = dma_ops_alloc_iova(dev, dma_dom, pages, dma_mask);
- if (!address)
- goto out;
-
- prot = dir2prot(direction);
-
- start = address;
- for (i = 0; i < pages; ++i) {
- ret = iommu_map_page(&dma_dom->domain, start, paddr,
- PAGE_SIZE, prot, GFP_ATOMIC);
- if (ret)
- goto out_unmap;
-
- paddr += PAGE_SIZE;
- start += PAGE_SIZE;
- }
- address += offset;
-
- domain_flush_np_cache(&dma_dom->domain, address, size);
-
-out:
- return address;
-
-out_unmap:
-
- for (--i; i >= 0; --i) {
- start -= PAGE_SIZE;
- iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
- }
-
- domain_flush_tlb(&dma_dom->domain);
- domain_flush_complete(&dma_dom->domain);
-
- dma_ops_free_iova(dma_dom, address, pages);
-
- return DMA_MAPPING_ERROR;
-}
-
-/*
- * Does the reverse of the __map_single function. Must be called with
- * the domain lock held too
- */
-static void __unmap_single(struct dma_ops_domain *dma_dom,
- dma_addr_t dma_addr,
- size_t size,
- int dir)
-{
- dma_addr_t i, start;
- unsigned int pages;
-
- pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr &= PAGE_MASK;
- start = dma_addr;
-
- for (i = 0; i < pages; ++i) {
- iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
- start += PAGE_SIZE;
- }
-
- if (amd_iommu_unmap_flush) {
- domain_flush_tlb(&dma_dom->domain);
- domain_flush_complete(&dma_dom->domain);
- dma_ops_free_iova(dma_dom, dma_addr, pages);
- } else {
- pages = __roundup_pow_of_two(pages);
- queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
- }
-}
-
-/*
- * The exported map_single function for dma_ops.
- */
-static dma_addr_t map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- phys_addr_t paddr = page_to_phys(page) + offset;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- u64 dma_mask;
-
- domain = get_domain(dev);
- if (PTR_ERR(domain) == -EINVAL)
- return (dma_addr_t)paddr;
- else if (IS_ERR(domain))
- return DMA_MAPPING_ERROR;
-
- dma_mask = *dev->dma_mask;
- dma_dom = to_dma_ops_domain(domain);
-
- return __map_single(dev, dma_dom, paddr, size, dir, dma_mask);
-}
-
-/*
- * The exported unmap_single function for dma_ops.
- */
-static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- return;
-
- dma_dom = to_dma_ops_domain(domain);
-
- __unmap_single(dma_dom, dma_addr, size, dir);
-}
-
-static int sg_num_pages(struct device *dev,
- struct scatterlist *sglist,
- int nelems)
-{
- unsigned long mask, boundary_size;
- struct scatterlist *s;
- int i, npages = 0;
-
- mask = dma_get_seg_boundary(dev);
- boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
- 1UL << (BITS_PER_LONG - PAGE_SHIFT);
-
- for_each_sg(sglist, s, nelems, i) {
- int p, n;
-
- s->dma_address = npages << PAGE_SHIFT;
- p = npages % boundary_size;
- n = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
- if (p + n > boundary_size)
- npages += boundary_size - p;
- npages += n;
- }
-
- return npages;
-}
-
-/*
- * The exported map_sg function for dma_ops (handles scatter-gather
- * lists).
- */
-static int map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction,
- unsigned long attrs)
-{
- int mapped_pages = 0, npages = 0, prot = 0, i;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct scatterlist *s;
- unsigned long address;
- u64 dma_mask;
- int ret;
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- return 0;
-
- dma_dom = to_dma_ops_domain(domain);
- dma_mask = *dev->dma_mask;
-
- npages = sg_num_pages(dev, sglist, nelems);
-
- address = dma_ops_alloc_iova(dev, dma_dom, npages, dma_mask);
- if (!address)
- goto out_err;
-
- prot = dir2prot(direction);
-
- /* Map all sg entries */
- for_each_sg(sglist, s, nelems, i) {
- int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
-
- for (j = 0; j < pages; ++j) {
- unsigned long bus_addr, phys_addr;
-
- bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
- phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT);
- ret = iommu_map_page(domain, bus_addr, phys_addr, PAGE_SIZE, prot, GFP_ATOMIC);
- if (ret)
- goto out_unmap;
-
- mapped_pages += 1;
- }
- }
-
- /* Everything is mapped - write the right values into s->dma_address */
- for_each_sg(sglist, s, nelems, i) {
- /*
- * Add in the remaining piece of the scatter-gather offset that
- * was masked out when we were determining the physical address
- * via (sg_phys(s) & PAGE_MASK) earlier.
- */
- s->dma_address += address + (s->offset & ~PAGE_MASK);
- s->dma_length = s->length;
- }
-
- if (s)
- domain_flush_np_cache(domain, s->dma_address, s->dma_length);
-
- return nelems;
-
-out_unmap:
- dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n",
- npages, ret);
-
- for_each_sg(sglist, s, nelems, i) {
- int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE);
-
- for (j = 0; j < pages; ++j) {
- unsigned long bus_addr;
-
- bus_addr = address + s->dma_address + (j << PAGE_SHIFT);
- iommu_unmap_page(domain, bus_addr, PAGE_SIZE);
-
- if (--mapped_pages == 0)
- goto out_free_iova;
- }
- }
-
-out_free_iova:
- free_iova_fast(&dma_dom->iovad, address >> PAGE_SHIFT, npages);
-
-out_err:
- return 0;
-}
-
-/*
- * The exported map_sg function for dma_ops (handles scatter-gather
- * lists).
- */
-static void unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- unsigned long startaddr;
- int npages;
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- return;
-
- startaddr = sg_dma_address(sglist) & PAGE_MASK;
- dma_dom = to_dma_ops_domain(domain);
- npages = sg_num_pages(dev, sglist, nelems);
-
- __unmap_single(dma_dom, startaddr, npages << PAGE_SHIFT, dir);
-}
-
-/*
- * The exported alloc_coherent function for dma_ops.
- */
-static void *alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_addr, gfp_t flag,
- unsigned long attrs)
-{
- u64 dma_mask = dev->coherent_dma_mask;
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
-
- domain = get_domain(dev);
- if (PTR_ERR(domain) == -EINVAL) {
- page = alloc_pages(flag, get_order(size));
- *dma_addr = page_to_phys(page);
- return page_address(page);
- } else if (IS_ERR(domain))
- return NULL;
-
- dma_dom = to_dma_ops_domain(domain);
- size = PAGE_ALIGN(size);
- dma_mask = dev->coherent_dma_mask;
- flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
- flag |= __GFP_ZERO;
-
- page = alloc_pages(flag | __GFP_NOWARN, get_order(size));
- if (!page) {
- if (!gfpflags_allow_blocking(flag))
- return NULL;
-
- page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
- get_order(size), flag & __GFP_NOWARN);
- if (!page)
- return NULL;
- }
-
- if (!dma_mask)
- dma_mask = *dev->dma_mask;
-
- *dma_addr = __map_single(dev, dma_dom, page_to_phys(page),
- size, DMA_BIDIRECTIONAL, dma_mask);
-
- if (*dma_addr == DMA_MAPPING_ERROR)
- goto out_free;
-
- return page_address(page);
-
-out_free:
-
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
-
- return NULL;
-}
-
-/*
- * The exported free_coherent function for dma_ops.
- */
-static void free_coherent(struct device *dev, size_t size,
- void *virt_addr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
- struct page *page;
-
- page = virt_to_page(virt_addr);
- size = PAGE_ALIGN(size);
-
- domain = get_domain(dev);
- if (IS_ERR(domain))
- goto free_mem;
-
- dma_dom = to_dma_ops_domain(domain);
-
- __unmap_single(dma_dom, dma_addr, size, DMA_BIDIRECTIONAL);
-
-free_mem:
- if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
-}
-
-/*
- * This function is called by the DMA layer to find out if we can handle a
- * particular device. It is part of the dma_ops.
- */
-static int amd_iommu_dma_supported(struct device *dev, u64 mask)
-{
- if (!dma_direct_supported(dev, mask))
- return 0;
- return check_device(dev);
-}
-
-static const struct dma_map_ops amd_iommu_dma_ops = {
- .alloc = alloc_coherent,
- .free = free_coherent,
- .map_page = map_page,
- .unmap_page = unmap_page,
- .map_sg = map_sg,
- .unmap_sg = unmap_sg,
- .dma_supported = amd_iommu_dma_supported,
-};
-
-static int init_reserved_iova_ranges(void)
-{
- struct pci_dev *pdev = NULL;
- struct iova *val;
-
- init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
-
- lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
- &reserved_rbtree_key);
-
- /* MSI memory range */
- val = reserve_iova(&reserved_iova_ranges,
- IOVA_PFN(MSI_RANGE_START), IOVA_PFN(MSI_RANGE_END));
- if (!val) {
- pr_err("Reserving MSI range failed\n");
- return -ENOMEM;
- }
-
- /* HT memory range */
- val = reserve_iova(&reserved_iova_ranges,
- IOVA_PFN(HT_RANGE_START), IOVA_PFN(HT_RANGE_END));
- if (!val) {
- pr_err("Reserving HT range failed\n");
- return -ENOMEM;
- }
-
- /*
- * Memory used for PCI resources
- * FIXME: Check whether we can reserve the PCI-hole completly
- */
- for_each_pci_dev(pdev) {
- int i;
-
- for (i = 0; i < PCI_NUM_RESOURCES; ++i) {
- struct resource *r = &pdev->resource[i];
-
- if (!(r->flags & IORESOURCE_MEM))
- continue;
-
- val = reserve_iova(&reserved_iova_ranges,
- IOVA_PFN(r->start),
- IOVA_PFN(r->end));
- if (!val) {
- pci_err(pdev, "Reserve pci-resource range %pR failed\n", r);
- return -ENOMEM;
- }
- }
- }
-
- return 0;
}
int __init amd_iommu_init_api(void)
@@ -2784,10 +2276,6 @@ int __init amd_iommu_init_api(void)
if (ret)
return ret;
- ret = init_reserved_iova_ranges();
- if (ret)
- return ret;
-
err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
if (err)
return err;
@@ -2805,8 +2293,7 @@ int __init amd_iommu_init_api(void)
int __init amd_iommu_init_dma_ops(void)
{
- swiotlb = (iommu_pass_through || sme_me_mask) ? 1 : 0;
- iommu_detected = 1;
+ swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
if (amd_iommu_unmap_flush)
pr_info("IO/TLB flush on unmap enabled\n");
@@ -2832,16 +2319,16 @@ static void cleanup_domain(struct protection_domain *domain)
struct iommu_dev_data *entry;
unsigned long flags;
- spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
+ spin_lock_irqsave(&domain->lock, flags);
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
struct iommu_dev_data, list);
BUG_ON(!entry->domain);
- __detach_device(entry);
+ do_detach(entry);
}
- spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
+ spin_unlock_irqrestore(&domain->lock, flags);
}
static void protection_domain_free(struct protection_domain *domain)
@@ -2858,7 +2345,6 @@ static void protection_domain_free(struct protection_domain *domain)
static int protection_domain_init(struct protection_domain *domain)
{
spin_lock_init(&domain->lock);
- mutex_init(&domain->api_lock);
domain->id = domain_id_alloc();
if (!domain->id)
return -ENOMEM;
@@ -2889,7 +2375,6 @@ out_err:
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *pdomain;
- struct dma_ops_domain *dma_domain;
switch (type) {
case IOMMU_DOMAIN_UNMANAGED:
@@ -2910,12 +2395,11 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
break;
case IOMMU_DOMAIN_DMA:
- dma_domain = dma_ops_domain_alloc();
- if (!dma_domain) {
+ pdomain = dma_ops_domain_alloc();
+ if (!pdomain) {
pr_err("Failed to allocate\n");
return NULL;
}
- pdomain = &dma_domain->domain;
break;
case IOMMU_DOMAIN_IDENTITY:
pdomain = protection_domain_alloc();
@@ -2934,7 +2418,6 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
- struct dma_ops_domain *dma_dom;
domain = to_pdomain(dom);
@@ -2949,8 +2432,7 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
switch (dom->type) {
case IOMMU_DOMAIN_DMA:
/* Now release the domain */
- dma_dom = to_dma_ops_domain(domain);
- dma_ops_domain_free(dma_dom);
+ dma_ops_domain_free(domain);
break;
default:
if (domain->mode != PAGE_MODE_NONE)
@@ -3006,6 +2488,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return -EINVAL;
dev_data = dev->archdata.iommu;
+ dev_data->defer_attach = false;
iommu = amd_iommu_rlookup_table[dev_data->devid];
if (!iommu)
@@ -3031,7 +2514,8 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
}
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
- phys_addr_t paddr, size_t page_size, int iommu_prot)
+ phys_addr_t paddr, size_t page_size, int iommu_prot,
+ gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
int prot = 0;
@@ -3045,9 +2529,7 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- mutex_lock(&domain->api_lock);
- ret = iommu_map_page(domain, iova, paddr, page_size, prot, GFP_KERNEL);
- mutex_unlock(&domain->api_lock);
+ ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
domain_flush_np_cache(domain, iova, page_size);
@@ -3055,19 +2537,15 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
}
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
- size_t page_size)
+ size_t page_size,
+ struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
- size_t unmap_size;
if (domain->mode == PAGE_MODE_NONE)
return 0;
- mutex_lock(&domain->api_lock);
- unmap_size = iommu_unmap_page(domain, iova, page_size);
- mutex_unlock(&domain->api_lock);
-
- return unmap_size;
+ return iommu_unmap_page(domain, iova, page_size);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
@@ -3159,28 +2637,6 @@ static void amd_iommu_get_resv_regions(struct device *dev,
list_add_tail(&region->list, head);
}
-static void amd_iommu_put_resv_regions(struct device *dev,
- struct list_head *head)
-{
- struct iommu_resv_region *entry, *next;
-
- list_for_each_entry_safe(entry, next, head, list)
- kfree(entry);
-}
-
-static void amd_iommu_apply_resv_region(struct device *dev,
- struct iommu_domain *domain,
- struct iommu_resv_region *region)
-{
- struct dma_ops_domain *dma_dom = to_dma_ops_domain(to_pdomain(domain));
- unsigned long start, end;
-
- start = IOVA_PFN(region->start);
- end = IOVA_PFN(region->start + region->length - 1);
-
- WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
-}
-
static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev)
{
@@ -3191,14 +2647,18 @@ static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
struct protection_domain *dom = to_pdomain(domain);
+ unsigned long flags;
+ spin_lock_irqsave(&dom->lock, flags);
domain_flush_tlb_pde(dom);
domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
}
-static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+ struct iommu_iotlb_gather *gather)
{
+ amd_iommu_flush_iotlb_all(domain);
}
const struct iommu_ops amd_iommu_ops = {
@@ -3213,14 +2673,13 @@ const struct iommu_ops amd_iommu_ops = {
.add_device = amd_iommu_add_device,
.remove_device = amd_iommu_remove_device,
.device_group = amd_iommu_device_group,
+ .domain_get_attr = amd_iommu_domain_get_attr,
.get_resv_regions = amd_iommu_get_resv_regions,
- .put_resv_regions = amd_iommu_put_resv_regions,
- .apply_resv_region = amd_iommu_apply_resv_region,
+ .put_resv_regions = generic_iommu_put_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
.flush_iotlb_all = amd_iommu_flush_iotlb_all,
- .iotlb_range_add = amd_iommu_iotlb_range_add,
- .iotlb_sync = amd_iommu_flush_iotlb_all,
+ .iotlb_sync = amd_iommu_iotlb_sync,
};
/*****************************************************************************
@@ -3255,7 +2714,6 @@ void amd_iommu_domain_direct_map(struct iommu_domain *dom)
/* Update data structure */
domain->mode = PAGE_MODE_NONE;
- domain->updated = true;
/* Make changes visible to IOMMUs */
update_domain(domain);
@@ -3301,7 +2759,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
- domain->updated = true;
update_domain(domain);
@@ -3530,9 +2987,23 @@ EXPORT_SYMBOL(amd_iommu_complete_ppr);
struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
{
struct protection_domain *pdomain;
+ struct iommu_domain *io_domain;
+ struct device *dev = &pdev->dev;
+
+ if (!check_device(dev))
+ return NULL;
+
+ pdomain = get_dev_data(dev)->domain;
+ if (pdomain == NULL && get_dev_data(dev)->defer_attach) {
+ get_dev_data(dev)->defer_attach = false;
+ io_domain = iommu_get_domain_for_dev(dev);
+ pdomain = to_pdomain(io_domain);
+ attach_device(dev, pdomain);
+ }
+ if (pdomain == NULL)
+ return NULL;
- pdomain = get_domain(&pdev->dev);
- if (IS_ERR(pdomain))
+ if (!dma_ops_domain(pdomain))
return NULL;
/* Only return IOMMUv2 domains */
@@ -3672,7 +3143,20 @@ static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid,
iommu_flush_dte(iommu, devid);
}
-static struct irq_remap_table *alloc_irq_table(u16 devid)
+static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias,
+ void *data)
+{
+ struct irq_remap_table *table = data;
+
+ irq_lookup_table[alias] = table;
+ set_dte_irq_entry(alias, table);
+
+ iommu_flush_dte(amd_iommu_rlookup_table[alias], alias);
+
+ return 0;
+}
+
+static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev)
{
struct irq_remap_table *table = NULL;
struct irq_remap_table *new_table = NULL;
@@ -3718,7 +3202,12 @@ static struct irq_remap_table *alloc_irq_table(u16 devid)
table = new_table;
new_table = NULL;
- set_remap_table_entry(iommu, devid, table);
+ if (pdev)
+ pci_for_each_dma_alias(pdev, set_remap_table_entry_alias,
+ table);
+ else
+ set_remap_table_entry(iommu, devid, table);
+
if (devid != alias)
set_remap_table_entry(iommu, alias, table);
@@ -3735,7 +3224,8 @@ out_unlock:
return table;
}
-static int alloc_irq_index(u16 devid, int count, bool align)
+static int alloc_irq_index(u16 devid, int count, bool align,
+ struct pci_dev *pdev)
{
struct irq_remap_table *table;
int index, c, alignment = 1;
@@ -3745,7 +3235,7 @@ static int alloc_irq_index(u16 devid, int count, bool align)
if (!iommu)
return -ENODEV;
- table = alloc_irq_table(devid);
+ table = alloc_irq_table(devid, pdev);
if (!table)
return -ENODEV;
@@ -4178,7 +3668,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_remap_table *table;
struct amd_iommu *iommu;
- table = alloc_irq_table(devid);
+ table = alloc_irq_table(devid, NULL);
if (table) {
if (!table->min_index) {
/*
@@ -4195,11 +3685,15 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
} else {
index = -ENOMEM;
}
- } else {
+ } else if (info->type == X86_IRQ_ALLOC_TYPE_MSI ||
+ info->type == X86_IRQ_ALLOC_TYPE_MSIX) {
bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
- index = alloc_irq_index(devid, nr_irqs, align);
+ index = alloc_irq_index(devid, nr_irqs, align, info->msi_dev);
+ } else {
+ index = alloc_irq_index(devid, nr_irqs, false, NULL);
}
+
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
ret = index;
@@ -4313,13 +3807,62 @@ static const struct irq_domain_ops amd_ir_domain_ops = {
.deactivate = irq_remapping_deactivate,
};
+int amd_iommu_activate_guest_mode(void *data)
+{
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+ !entry || entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ entry->lo.val = 0;
+ entry->hi.val = 0;
+
+ entry->lo.fields_vapic.guest_mode = 1;
+ entry->lo.fields_vapic.ga_log_intr = 1;
+ entry->hi.fields.ga_root_ptr = ir_data->ga_root_ptr;
+ entry->hi.fields.vector = ir_data->ga_vector;
+ entry->lo.fields_vapic.ga_tag = ir_data->ga_tag;
+
+ return modify_irte_ga(ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
+
+int amd_iommu_deactivate_guest_mode(void *data)
+{
+ struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+ struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+ struct irq_cfg *cfg = ir_data->cfg;
+
+ if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+ !entry || !entry->lo.fields_vapic.guest_mode)
+ return 0;
+
+ entry->lo.val = 0;
+ entry->hi.val = 0;
+
+ entry->lo.fields_remap.dm = apic->irq_dest_mode;
+ entry->lo.fields_remap.int_type = apic->irq_delivery_mode;
+ entry->hi.fields.vector = cfg->vector;
+ entry->lo.fields_remap.destination =
+ APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
+ entry->hi.fields.destination =
+ APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
+
+ return modify_irte_ga(ir_data->irq_2_irte.devid,
+ ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
+
static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
{
+ int ret;
struct amd_iommu *iommu;
struct amd_iommu_pi_data *pi_data = vcpu_info;
struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
struct amd_ir_data *ir_data = data->chip_data;
- struct irte_ga *irte = (struct irte_ga *) ir_data->entry;
struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
@@ -4330,6 +3873,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
if (!dev_data || !dev_data->use_vapic)
return 0;
+ ir_data->cfg = irqd_cfg(data);
pi_data->ir_data = ir_data;
/* Note:
@@ -4348,37 +3892,24 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
pi_data->prev_ga_tag = ir_data->cached_ga_tag;
if (pi_data->is_guest_mode) {
- /* Setting */
- irte->hi.fields.ga_root_ptr = (pi_data->base >> 12);
- irte->hi.fields.vector = vcpu_pi_info->vector;
- irte->lo.fields_vapic.ga_log_intr = 1;
- irte->lo.fields_vapic.guest_mode = 1;
- irte->lo.fields_vapic.ga_tag = pi_data->ga_tag;
-
- ir_data->cached_ga_tag = pi_data->ga_tag;
+ ir_data->ga_root_ptr = (pi_data->base >> 12);
+ ir_data->ga_vector = vcpu_pi_info->vector;
+ ir_data->ga_tag = pi_data->ga_tag;
+ ret = amd_iommu_activate_guest_mode(ir_data);
+ if (!ret)
+ ir_data->cached_ga_tag = pi_data->ga_tag;
} else {
- /* Un-Setting */
- struct irq_cfg *cfg = irqd_cfg(data);
-
- irte->hi.val = 0;
- irte->lo.val = 0;
- irte->hi.fields.vector = cfg->vector;
- irte->lo.fields_remap.guest_mode = 0;
- irte->lo.fields_remap.destination =
- APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
- irte->hi.fields.destination =
- APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
- irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
- irte->lo.fields_remap.dm = apic->irq_dest_mode;
+ ret = amd_iommu_deactivate_guest_mode(ir_data);
/*
* This communicates the ga_tag back to the caller
* so that it can do all the necessary clean up.
*/
- ir_data->cached_ga_tag = 0;
+ if (!ret)
+ ir_data->cached_ga_tag = 0;
}
- return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
+ return ret;
}
OpenPOWER on IntegriCloud