From 89df3a96baeaf5d565183e9e9fc35c9974c20d68 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Oct 2015 13:48:56 +0000 Subject: iommu/arm-smmu: Remove #define for non-existent PRIQ_0_OF field PRIQ_0_OF has been removed from the SMMUv3 architecture, so remove its corresponding (and unused) #define from the driver. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4e5118a4cd30..e0032c098b32 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -378,7 +378,6 @@ #define PRIQ_0_SID_MASK 0xffffffffUL #define PRIQ_0_SSID_SHIFT 32 #define PRIQ_0_SSID_MASK 0xfffffUL -#define PRIQ_0_OF (1UL << 57) #define PRIQ_0_PERM_PRIV (1UL << 58) #define PRIQ_0_PERM_EXEC (1UL << 59) #define PRIQ_0_PERM_READ (1UL << 60) -- cgit v1.2.1 From 04fa26c71be5d7cf1c63f23f6345dad209f361d7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 30 Oct 2015 18:12:41 +0000 Subject: iommu/arm-smmu: Convert DMA buffer allocations to the managed API The ARM SMMUv3 driver uses dma_{alloc,free}_coherent to manage its queues and configuration data structures. This patch converts the driver to the managed (dmam_*) API, so that resources are freed automatically on device teardown. This greatly simplifies the failure paths and allows us to remove a bunch of handcrafted freeing code. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 132 +++++++------------------------------------- 1 file changed, 21 insertions(+), 111 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e0032c098b32..86480480895d 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1122,8 +1122,8 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; desc->span = STRTAB_SPLIT + 1; - desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma, - GFP_KERNEL); + desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, + GFP_KERNEL | __GFP_ZERO); if (!desc->l2ptr) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", @@ -1428,10 +1428,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; if (cfg->cdptr) { - dma_free_coherent(smmu_domain->smmu->dev, - CTXDESC_CD_DWORDS << 3, - cfg->cdptr, - cfg->cdptr_dma); + dmam_free_coherent(smmu_domain->smmu->dev, + CTXDESC_CD_DWORDS << 3, + cfg->cdptr, + cfg->cdptr_dma); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); } @@ -1456,8 +1456,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, if (IS_ERR_VALUE(asid)) return asid; - cfg->cdptr = dma_zalloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, - &cfg->cdptr_dma, GFP_KERNEL); + cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, + &cfg->cdptr_dma, + GFP_KERNEL | __GFP_ZERO); if (!cfg->cdptr) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; @@ -1936,7 +1937,7 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, { size_t qsz = ((1 << q->max_n_shift) * dwords) << 3; - q->base = dma_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); + q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma, GFP_KERNEL); if (!q->base) { dev_err(smmu->dev, "failed to allocate queue (0x%zx bytes)\n", qsz); @@ -1956,23 +1957,6 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static void arm_smmu_free_one_queue(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) -{ - size_t qsz = ((1 << q->max_n_shift) * q->ent_dwords) << 3; - - dma_free_coherent(smmu->dev, qsz, q->base, q->base_dma); -} - -static void arm_smmu_free_queues(struct arm_smmu_device *smmu) -{ - arm_smmu_free_one_queue(smmu, &smmu->cmdq.q); - arm_smmu_free_one_queue(smmu, &smmu->evtq.q); - - if (smmu->features & ARM_SMMU_FEAT_PRI) - arm_smmu_free_one_queue(smmu, &smmu->priq.q); -} - static int arm_smmu_init_queues(struct arm_smmu_device *smmu) { int ret; @@ -1982,49 +1966,20 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS); if (ret) - goto out; + return ret; /* evtq */ ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS); if (ret) - goto out_free_cmdq; + return ret; /* priq */ if (!(smmu->features & ARM_SMMU_FEAT_PRI)) return 0; - ret = arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, - ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); - if (ret) - goto out_free_evtq; - - return 0; - -out_free_evtq: - arm_smmu_free_one_queue(smmu, &smmu->evtq.q); -out_free_cmdq: - arm_smmu_free_one_queue(smmu, &smmu->cmdq.q); -out: - return ret; -} - -static void arm_smmu_free_l2_strtab(struct arm_smmu_device *smmu) -{ - int i; - size_t size; - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - - size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); - for (i = 0; i < cfg->num_l1_ents; ++i) { - struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[i]; - - if (!desc->l2ptr) - continue; - - dma_free_coherent(smmu->dev, size, desc->l2ptr, - desc->l2ptr_dma); - } + return arm_smmu_init_one_queue(smmu, &smmu->priq.q, ARM_SMMU_PRIQ_PROD, + ARM_SMMU_PRIQ_CONS, PRIQ_ENT_DWORDS); } static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) @@ -2053,7 +2008,6 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) void *strtab; u64 reg; u32 size, l1size; - int ret; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; /* @@ -2076,8 +2030,8 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) size, smmu->sid_bits); l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); - strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL); + strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, + GFP_KERNEL | __GFP_ZERO); if (!strtab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", @@ -2094,13 +2048,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) << STRTAB_BASE_CFG_SPLIT_SHIFT; cfg->strtab_base_cfg = reg; - ret = arm_smmu_init_l1_strtab(smmu); - if (ret) - dma_free_coherent(smmu->dev, - l1size, - strtab, - cfg->strtab_dma); - return ret; + return arm_smmu_init_l1_strtab(smmu); } static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) @@ -2111,8 +2059,8 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); - strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL); + strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, + GFP_KERNEL | __GFP_ZERO); if (!strtab) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", @@ -2156,21 +2104,6 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) return 0; } -static void arm_smmu_free_strtab(struct arm_smmu_device *smmu) -{ - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - u32 size = cfg->num_l1_ents; - - if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { - arm_smmu_free_l2_strtab(smmu); - size *= STRTAB_L1_DESC_DWORDS << 3; - } else { - size *= STRTAB_STE_DWORDS * 3; - } - - dma_free_coherent(smmu->dev, size, cfg->strtab, cfg->strtab_dma); -} - static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; @@ -2179,21 +2112,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - ret = arm_smmu_init_strtab(smmu); - if (ret) - goto out_free_queues; - - return 0; - -out_free_queues: - arm_smmu_free_queues(smmu); - return ret; -} - -static void arm_smmu_free_structures(struct arm_smmu_device *smmu) -{ - arm_smmu_free_strtab(smmu); - arm_smmu_free_queues(smmu); + return arm_smmu_init_strtab(smmu); } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, @@ -2698,15 +2617,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, smmu); /* Reset the device */ - ret = arm_smmu_device_reset(smmu); - if (ret) - goto out_free_structures; - - return 0; - -out_free_structures: - arm_smmu_free_structures(smmu); - return ret; + return arm_smmu_device_reset(smmu); } static int arm_smmu_device_remove(struct platform_device *pdev) @@ -2714,7 +2625,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) struct arm_smmu_device *smmu = platform_get_drvdata(pdev); arm_smmu_device_disable(smmu); - arm_smmu_free_structures(smmu); return 0; } -- cgit v1.2.1 From 44830b0cbdba29789f2a569d08dbaa3d1605c94c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 6 Nov 2015 18:32:41 +0100 Subject: iommu/arm-smmu: Delete an unnecessary check before free_io_pgtable_ops() The free_io_pgtable_ops() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 47dc7a793f5c..1ce4b85d5216 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -945,9 +945,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) free_irq(irq, domain); } - if (smmu_domain->pgtbl_ops) - free_io_pgtable_ops(smmu_domain->pgtbl_ops); - + free_io_pgtable_ops(smmu_domain->pgtbl_ops); __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); } -- cgit v1.2.1 From a0eacd89e35e55aad284cc2e6865bf2dcf7037ba Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Nov 2015 18:15:51 +0000 Subject: iommu/arm-smmu: Use incoming shareability attributes in bypass mode When we initialise a bypass STE, we memset the structure to zero and set the Valid and Config fields to indicate that the stream should bypass the SMMU. Unfortunately, this results in an SHCFG field of 0 which means that the shareability of any incoming transactions is overridden with non-shareable, leading to potential coherence problems down the line. This patch fixes the issue by initialising bypass STEs to use the incoming shareability attributes. When translation is in effect at either stage 1 or stage 2, the shareability is determined by the page tables. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 86480480895d..2e3e235f509c 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -253,6 +253,9 @@ #define STRTAB_STE_1_STRW_EL2 2UL #define STRTAB_STE_1_STRW_SHIFT 30 +#define STRTAB_STE_1_SHCFG_INCOMING 1UL +#define STRTAB_STE_1_SHCFG_SHIFT 44 + #define STRTAB_STE_2_S2VMID_SHIFT 0 #define STRTAB_STE_2_S2VMID_MASK 0xffffUL #define STRTAB_STE_2_VTCR_SHIFT 32 @@ -1041,6 +1044,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT : STRTAB_STE_0_CFG_BYPASS; dst[0] = cpu_to_le64(val); + dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING + << STRTAB_STE_1_SHCFG_SHIFT); dst[2] = 0; /* Nuke the VMID */ if (ste_live) arm_smmu_sync_ste_for_sid(smmu, sid); -- cgit v1.2.1 From 9a4a9d8c34bc0b0102e8a9dd67ee3910b0bfaeb4 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 20 Nov 2015 16:56:18 +0800 Subject: iommu/arm-smmu: Correct group reference count The basic flow for add a device: arm_smmu_add_device |->iommu_group_get_for_dev |->iommu_group_get return group; (1) |->ops->device_group : Init/increase reference count to/by 1. |->iommu_group_add_device : Increase reference count by 1. return group (2) |->return 0; Since we are adding one device, the flow is (2) and the group reference count will be increased by 2. So, we need to add iommu_group_put at the end of arm_smmu_add_device to decrease the count by 1. Also take the failure path into consideration when fail to add a device. Signed-off-by: Peng Fan Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 18 +++++++++++------- drivers/iommu/arm-smmu.c | 1 + 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 2e3e235f509c..3ea4d576bf08 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1809,13 +1809,13 @@ static int arm_smmu_add_device(struct device *dev) smmu = arm_smmu_get_for_pci_dev(pdev); if (!smmu) { ret = -ENOENT; - goto out_put_group; + goto out_remove_dev; } smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL); if (!smmu_group) { ret = -ENOMEM; - goto out_put_group; + goto out_remove_dev; } smmu_group->ste.valid = true; @@ -1831,20 +1831,20 @@ static int arm_smmu_add_device(struct device *dev) for (i = 0; i < smmu_group->num_sids; ++i) { /* If we already know about this SID, then we're done */ if (smmu_group->sids[i] == sid) - return 0; + goto out_put_group; } /* Check the SID is in range of the SMMU and our stream table */ if (!arm_smmu_sid_in_range(smmu, sid)) { ret = -ERANGE; - goto out_put_group; + goto out_remove_dev; } /* Ensure l2 strtab is initialised */ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { ret = arm_smmu_init_l2_strtab(smmu, sid); if (ret) - goto out_put_group; + goto out_remove_dev; } /* Resize the SID array for the group */ @@ -1854,15 +1854,19 @@ static int arm_smmu_add_device(struct device *dev) if (!sids) { smmu_group->num_sids--; ret = -ENOMEM; - goto out_put_group; + goto out_remove_dev; } /* Add the new SID */ sids[smmu_group->num_sids - 1] = sid; smmu_group->sids = sids; - return 0; out_put_group: + iommu_group_put(group); + return 0; + +out_remove_dev: + iommu_group_remove_device(dev); iommu_group_put(group); return ret; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1ce4b85d5216..6ed169bcb39d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1355,6 +1355,7 @@ static int arm_smmu_add_device(struct device *dev) if (IS_ERR(group)) return PTR_ERR(group); + iommu_group_put(group); return 0; } -- cgit v1.2.1 From a0d5c04c6053d8c47cca37384ae472f6b2ee0dee Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 4 Dec 2015 12:00:29 +0000 Subject: iommu/arm-smmu: Handle unknown CERROR values gracefully Whilst the architecture only defines a few of the possible CERROR values, we should handle unknown values gracefully rather than go out of bounds trying to print out an error description. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 3ea4d576bf08..4c5ef4e5da98 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -857,15 +857,17 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) }; dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons, - cerror_str[idx]); + idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown"); switch (idx) { - case CMDQ_ERR_CERROR_ILL_IDX: - break; case CMDQ_ERR_CERROR_ABT_IDX: dev_err(smmu->dev, "retrying command fetch\n"); case CMDQ_ERR_CERROR_NONE_IDX: return; + case CMDQ_ERR_CERROR_ILL_IDX: + /* Fallthrough */ + default: + break; } /* -- cgit v1.2.1 From 2eb97c78613082f308c0b39366c034cb589b8ee9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Dec 2015 17:52:58 +0000 Subject: iommu/io-pgtable-arm: Avoid dereferencing bogus PTEs In the case of corrupted page tables, or when an invalid size is given, __arm_lpae_unmap() may recurse beyond the maximum number of levels. Unfortunately the detection of this error condition only happens *after* calculating a nonsense offset from something which might not be a valid table pointer and dereferencing that to see if it is a valid PTE. Make things a little more robust by checking the level is valid before doing anything which depends on it being so. Reviewed-by: Laurent Pinchart Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7df97777662d..366a354c689d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -486,11 +486,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, void *cookie = data->iop.cookie; size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + /* Something went horribly wrong and we ran out of page table */ + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return 0; + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); pte = *ptep; - - /* Something went horribly wrong and we ran out of page table */ - if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS))) + if (WARN_ON(!pte)) return 0; /* If the size matches this level, we're in the right place */ -- cgit v1.2.1 From 06c610e8f32ba2fe41d57e1718611c2ec5024878 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 7 Dec 2015 18:18:53 +0000 Subject: iommu/io-pgtable: Indicate granule for TLB maintenance IOMMU hardware with range-based TLB maintenance commands can work happily with the iova and size arguments passed via the tlb_add_flush callback, but for IOMMUs which require separate commands per entry in the range, it is not straightforward to infer the necessary granularity when it comes to issuing the actual commands. Add an additional argument indicating the granularity for the benefit of drivers needing to know, and update the ARM LPAE code appropriately (for non-leaf invalidations we currently just assume the worst-case page granularity rather than walking the table to check). Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/io-pgtable-arm.c | 27 +++++++++++++++------------ drivers/iommu/io-pgtable.h | 4 ++-- drivers/iommu/ipmmu-vmsa.c | 4 ++-- 5 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4c5ef4e5da98..735ad2c58dd8 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1341,7 +1341,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - bool leaf, void *cookie) + size_t granule, bool leaf, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 6ed169bcb39d..7e04bf5640ae 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -582,7 +582,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - bool leaf, void *cookie) + size_t granule, bool leaf, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 366a354c689d..7a5c772f7be2 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -58,8 +58,10 @@ ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ * (d)->bits_per_level) + (d)->pg_shift) +#define ARM_LPAE_GRANULE(d) (1UL << (d)->pg_shift) + #define ARM_LPAE_PAGES_PER_PGD(d) \ - DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift) + DIV_ROUND_UP((d)->pgd_size, ARM_LPAE_GRANULE(d)) /* * Calculate the index at level l used to map virtual address a using the @@ -169,7 +171,7 @@ /* IOPTE accessors */ #define iopte_deref(pte,d) \ (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ - & ~((1ULL << (d)->pg_shift) - 1))) + & ~(ARM_LPAE_GRANULE(d) - 1ULL))) #define iopte_type(pte,l) \ (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) @@ -326,7 +328,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* Grab a pointer to the next level */ pte = *ptep; if (!pte) { - cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift, + cptep = __arm_lpae_alloc_pages(ARM_LPAE_GRANULE(data), GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; @@ -412,7 +414,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, if (lvl == ARM_LPAE_START_LVL(data)) table_size = data->pgd_size; else - table_size = 1UL << data->pg_shift; + table_size = ARM_LPAE_GRANULE(data); start = ptep; end = (void *)ptep + table_size; @@ -473,7 +475,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, __arm_lpae_set_pte(ptep, table, cfg); iova &= ~(blk_size - 1); - cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie); + cfg->tlb->tlb_add_flush(iova, blk_size, blk_size, true, data->iop.cookie); return size; } @@ -501,12 +503,13 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, if (!iopte_leaf(pte, lvl)) { /* Also flush any partial walks */ - tlb->tlb_add_flush(iova, size, false, cookie); + tlb->tlb_add_flush(iova, size, ARM_LPAE_GRANULE(data), + false, cookie); tlb->tlb_sync(cookie); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); } else { - tlb->tlb_add_flush(iova, size, true, cookie); + tlb->tlb_add_flush(iova, size, size, true, cookie); } return size; @@ -572,7 +575,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return 0; found_translation: - iova &= ((1 << data->pg_shift) - 1); + iova &= (ARM_LPAE_GRANULE(data) - 1); return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; } @@ -670,7 +673,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); - switch (1 << data->pg_shift) { + switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: reg |= ARM_LPAE_TCR_TG0_4K; break; @@ -771,7 +774,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) sl = ARM_LPAE_START_LVL(data); - switch (1 << data->pg_shift) { + switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: reg |= ARM_LPAE_TCR_TG0_4K; sl++; /* SL0 format is different for 4K granule size */ @@ -891,8 +894,8 @@ static void dummy_tlb_flush_all(void *cookie) WARN_ON(cookie != cfg_cookie); } -static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf, - void *cookie) +static void dummy_tlb_add_flush(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index ac9e2341a633..2e18469afe3c 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -26,8 +26,8 @@ enum io_pgtable_fmt { */ struct iommu_gather_ops { void (*tlb_flush_all)(void *cookie); - void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, - void *cookie); + void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, + bool leaf, void *cookie); void (*tlb_sync)(void *cookie); }; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 8cf605fa9946..5b1166d407c4 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -277,8 +277,8 @@ static void ipmmu_tlb_flush_all(void *cookie) ipmmu_tlb_invalidate(domain); } -static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, - void *cookie) +static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) { /* The hardware doesn't support selective TLB flush. */ } -- cgit v1.2.1 From 75df1386557c25188bd2383bbe8dd14a5ac81c06 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 7 Dec 2015 18:18:52 +0000 Subject: iommu/arm-smmu: Invalidate TLBs properly When invalidating an IOVA range potentially spanning multiple pages, such as when removing an entire intermediate-level table, we currently only issue an invalidation for the first IOVA of that range. Since the architecture specifies that address-based TLB maintenance operations target a single entry, an SMMU could feasibly retain live entries for subsequent pages within that unmapped range, which is not good. Make sure we hit every possible entry by iterating over the whole range at the granularity provided by the pagetable implementation. Signed-off-by: Robin Murphy [will: added missing semicolons...] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 5 ++++- drivers/iommu/arm-smmu.c | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 735ad2c58dd8..4991e79465ee 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1360,7 +1360,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; } - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + do { + arm_smmu_cmdq_issue_cmd(smmu, &cmd); + cmd.tlbi.addr += granule; + } while (size -= granule); } static struct iommu_gather_ops arm_smmu_gather_ops = { diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7e04bf5640ae..59ee4b8a3236 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -597,12 +597,18 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) { iova &= ~12UL; iova |= ARM_SMMU_CB_ASID(cfg); - writel_relaxed(iova, reg); + do { + writel_relaxed(iova, reg); + iova += granule; + } while (size -= granule); #ifdef CONFIG_64BIT } else { iova >>= 12; iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48; - writeq_relaxed(iova, reg); + do { + writeq_relaxed(iova, reg); + iova += granule >> 12; + } while (size -= granule); #endif } #ifdef CONFIG_64BIT @@ -610,7 +616,11 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; - writeq_relaxed(iova >> 12, reg); + iova >>= 12; + do { + writeq_relaxed(iova, reg); + iova += granule >> 12; + } while (size -= granule); #endif } else { reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; -- cgit v1.2.1 From fdc38967633ec23b3b24dfc487dfb7b90d1a0215 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 4 Dec 2015 17:53:01 +0000 Subject: iommu/io-pgtable: Make io_pgtable_ops_to_pgtable() macro common There is no need to keep a useful accessor for a public structure hidden away in a private implementation. Move it out alongside the structure definition so that other implementations may reuse it. Acked-by: Laurent Pinchart Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 3 --- drivers/iommu/io-pgtable.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7a5c772f7be2..937ba23e48d7 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -38,9 +38,6 @@ #define io_pgtable_to_data(x) \ container_of((x), struct arm_lpae_io_pgtable, iop) -#define io_pgtable_ops_to_pgtable(x) \ - container_of((x), struct io_pgtable, ops) - #define io_pgtable_ops_to_data(x) \ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 2e18469afe3c..36673c83de58 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -131,6 +131,8 @@ struct io_pgtable { struct io_pgtable_ops ops; }; +#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops) + /** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. -- cgit v1.2.1 From 324ba1082323a51a3ad282c20e3d3b11845cf030 Mon Sep 17 00:00:00 2001 From: Prem Mallappa Date: Mon, 14 Dec 2015 22:01:14 +0530 Subject: iommu/arm-smmu: Fix write to GERRORN register When acknowledging global errors, the GERRORN register should be written with the original GERROR value so that active errors are toggled. This patch fixed the driver to write the original GERROR value to GERRORN, instead of an active error mask. Signed-off-by: Prem Mallappa [will: reworked use of active bits and fixed commit log] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4991e79465ee..488f763877d2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1256,50 +1256,50 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu); static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) { - u32 gerror, gerrorn; + u32 gerror, gerrorn, active; struct arm_smmu_device *smmu = dev; gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR); gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN); - gerror ^= gerrorn; - if (!(gerror & GERROR_ERR_MASK)) + active = gerror ^ gerrorn; + if (!(active & GERROR_ERR_MASK)) return IRQ_NONE; /* No errors pending */ dev_warn(smmu->dev, "unexpected global error reported (0x%08x), this could be serious\n", - gerror); + active); - if (gerror & GERROR_SFM_ERR) { + if (active & GERROR_SFM_ERR) { dev_err(smmu->dev, "device has entered Service Failure Mode!\n"); arm_smmu_device_disable(smmu); } - if (gerror & GERROR_MSI_GERROR_ABT_ERR) + if (active & GERROR_MSI_GERROR_ABT_ERR) dev_warn(smmu->dev, "GERROR MSI write aborted\n"); - if (gerror & GERROR_MSI_PRIQ_ABT_ERR) { + if (active & GERROR_MSI_PRIQ_ABT_ERR) { dev_warn(smmu->dev, "PRIQ MSI write aborted\n"); arm_smmu_priq_handler(irq, smmu->dev); } - if (gerror & GERROR_MSI_EVTQ_ABT_ERR) { + if (active & GERROR_MSI_EVTQ_ABT_ERR) { dev_warn(smmu->dev, "EVTQ MSI write aborted\n"); arm_smmu_evtq_handler(irq, smmu->dev); } - if (gerror & GERROR_MSI_CMDQ_ABT_ERR) { + if (active & GERROR_MSI_CMDQ_ABT_ERR) { dev_warn(smmu->dev, "CMDQ MSI write aborted\n"); arm_smmu_cmdq_sync_handler(irq, smmu->dev); } - if (gerror & GERROR_PRIQ_ABT_ERR) + if (active & GERROR_PRIQ_ABT_ERR) dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n"); - if (gerror & GERROR_EVTQ_ABT_ERR) + if (active & GERROR_EVTQ_ABT_ERR) dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n"); - if (gerror & GERROR_CMDQ_ERR) + if (active & GERROR_CMDQ_ERR) arm_smmu_cmdq_skip_err(smmu); writel(gerror, smmu->base + ARM_SMMU_GERRORN); -- cgit v1.2.1 From 6380be0535fd60c0a346ec0ae447f0f6c9e3ea83 Mon Sep 17 00:00:00 2001 From: Prem Mallappa Date: Mon, 14 Dec 2015 22:01:23 +0530 Subject: iommu/arm-smmu: Use STE.S1STALLD only when supported It is ILLEGAL to set STE.S1STALLD to 1 if stage 1 is enabled and either the stall or terminate models are not supported. This patch fixes the STALLD check and ensures that we don't set STALLD in the STE when it is not supported. Signed-off-by: Prem Mallappa [will: consistently use IDR0_STALL_MODEL_* prefix] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 488f763877d2..20875341c865 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -40,7 +40,10 @@ #define IDR0_ST_LVL_SHIFT 27 #define IDR0_ST_LVL_MASK 0x3 #define IDR0_ST_LVL_2LVL (1 << IDR0_ST_LVL_SHIFT) -#define IDR0_STALL_MODEL (3 << 24) +#define IDR0_STALL_MODEL_SHIFT 24 +#define IDR0_STALL_MODEL_MASK 0x3 +#define IDR0_STALL_MODEL_STALL (0 << IDR0_STALL_MODEL_SHIFT) +#define IDR0_STALL_MODEL_FORCE (2 << IDR0_STALL_MODEL_SHIFT) #define IDR0_TTENDIAN_SHIFT 21 #define IDR0_TTENDIAN_MASK 0x3 #define IDR0_TTENDIAN_LE (2 << IDR0_TTENDIAN_SHIFT) @@ -1062,12 +1065,14 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, STRTAB_STE_1_S1C_CACHE_WBRA << STRTAB_STE_1_S1COR_SHIFT | STRTAB_STE_1_S1C_SH_ISH << STRTAB_STE_1_S1CSH_SHIFT | - STRTAB_STE_1_S1STALLD | #ifdef CONFIG_PCI_ATS STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT | #endif STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT); + if (smmu->features & ARM_SMMU_FEAT_STALLS) + dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); + val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK << STRTAB_STE_0_S1CTXPTR_SHIFT) | STRTAB_STE_0_CFG_S1_TRANS; @@ -2464,8 +2469,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu) dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n", coherent ? "true" : "false"); - if (reg & IDR0_STALL_MODEL) + switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) { + case IDR0_STALL_MODEL_STALL: + /* Fallthrough */ + case IDR0_STALL_MODEL_FORCE: smmu->features |= ARM_SMMU_FEAT_STALLS; + } if (reg & IDR0_S1P) smmu->features |= ARM_SMMU_FEAT_TRANS_S1; -- cgit v1.2.1 From 12c2ab09571e8aae3a87da2a4a452632a5fac1e5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Dec 2015 16:08:12 +0000 Subject: iommu/io-pgtable-arm: Ensure we free the final level on teardown When tearing down page tables, we return early for the final level since we know that we won't have any table pointers to follow. Unfortunately, this also means that we forget to free the final level, so we end up leaking memory. Fix the issue by always freeing the current level, but just don't bother to iterate over the ptes if we're at the final level. Cc: Reported-by: Zhang Bo Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 937ba23e48d7..8bbcbfe7695c 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -404,17 +404,18 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, arm_lpae_iopte *start, *end; unsigned long table_size; - /* Only leaf entries at the last level */ - if (lvl == ARM_LPAE_MAX_LEVELS - 1) - return; - if (lvl == ARM_LPAE_START_LVL(data)) table_size = data->pgd_size; else table_size = ARM_LPAE_GRANULE(data); start = ptep; - end = (void *)ptep + table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + end = ptep; + else + end = (void *)ptep + table_size; while (ptep != end) { arm_lpae_iopte pte = *ptep++; -- cgit v1.2.1