diff options
Diffstat (limited to 'drivers/iommu/arm-smmu.c')
| -rw-r--r-- | drivers/iommu/arm-smmu.c | 223 | 
1 files changed, 127 insertions, 96 deletions
| diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 7c503a6bc585..4f1a350d9529 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -36,6 +36,7 @@  #include <linux/pci.h>  #include <linux/platform_device.h>  #include <linux/pm_runtime.h> +#include <linux/ratelimit.h>  #include <linux/slab.h>  #include <linux/amba/bus.h> @@ -122,7 +123,7 @@ static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)  static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)  {  	if (pm_runtime_enabled(smmu->dev)) -		pm_runtime_put(smmu->dev); +		pm_runtime_put_autosuspend(smmu->dev);  }  static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -244,6 +245,9 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,  	unsigned int spin_cnt, delay;  	u32 reg; +	if (smmu->impl && unlikely(smmu->impl->tlb_sync)) +		return smmu->impl->tlb_sync(smmu, page, sync, status); +  	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);  	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {  		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { @@ -268,9 +272,8 @@ static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)  	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);  } -static void arm_smmu_tlb_sync_context(void *cookie) +static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)  { -	struct arm_smmu_domain *smmu_domain = cookie;  	struct arm_smmu_device *smmu = smmu_domain->smmu;  	unsigned long flags; @@ -280,13 +283,6 @@ static void arm_smmu_tlb_sync_context(void *cookie)  	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);  } -static void arm_smmu_tlb_sync_vmid(void *cookie) -{ -	struct arm_smmu_domain *smmu_domain = cookie; - -	arm_smmu_tlb_sync_global(smmu_domain->smmu); -} -  static void arm_smmu_tlb_inv_context_s1(void *cookie)  {  	struct arm_smmu_domain *smmu_domain = cookie; @@ -297,7 +293,7 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)  	wmb();  	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,  			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid); -	arm_smmu_tlb_sync_context(cookie); +	arm_smmu_tlb_sync_context(smmu_domain);  }  static void arm_smmu_tlb_inv_context_s2(void *cookie) @@ -312,18 +308,16 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)  }  static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size, -				      size_t granule, bool leaf, void *cookie) +				      size_t granule, void *cookie, int reg)  {  	struct arm_smmu_domain *smmu_domain = cookie;  	struct arm_smmu_device *smmu = smmu_domain->smmu;  	struct arm_smmu_cfg *cfg = &smmu_domain->cfg; -	int reg, idx = cfg->cbndx; +	int idx = cfg->cbndx;  	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)  		wmb(); -	reg = leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; -  	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {  		iova = (iova >> 12) << 12;  		iova |= cfg->asid; @@ -342,16 +336,15 @@ static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,  }  static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size, -				      size_t granule, bool leaf, void *cookie) +				      size_t granule, void *cookie, int reg)  {  	struct arm_smmu_domain *smmu_domain = cookie;  	struct arm_smmu_device *smmu = smmu_domain->smmu; -	int reg, idx = smmu_domain->cfg.cbndx; +	int idx = smmu_domain->cfg.cbndx;  	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)  		wmb(); -	reg = leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2;  	iova >>= 12;  	do {  		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64) @@ -362,85 +355,98 @@ static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,  	} while (size -= granule);  } -/* - * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears - * almost negligible, but the benefit of getting the first one in as far ahead - * of the sync as possible is significant, hence we don't just make this a - * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think. - */ -static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, -					 size_t granule, bool leaf, void *cookie) +static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size, +				     size_t granule, void *cookie)  { -	struct arm_smmu_domain *smmu_domain = cookie; -	struct arm_smmu_device *smmu = smmu_domain->smmu; - -	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) -		wmb(); +	arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie, +				  ARM_SMMU_CB_S1_TLBIVA); +	arm_smmu_tlb_sync_context(cookie); +} -	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid); +static void arm_smmu_tlb_inv_leaf_s1(unsigned long iova, size_t size, +				     size_t granule, void *cookie) +{ +	arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie, +				  ARM_SMMU_CB_S1_TLBIVAL); +	arm_smmu_tlb_sync_context(cookie);  } -static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, -				  size_t granule, void *cookie) +static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather, +				     unsigned long iova, size_t granule, +				     void *cookie)  { -	struct arm_smmu_domain *smmu_domain = cookie; -	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; +	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie, +				  ARM_SMMU_CB_S1_TLBIVAL); +} -	ops->tlb_inv_range(iova, size, granule, false, cookie); -	ops->tlb_sync(cookie); +static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size, +				     size_t granule, void *cookie) +{ +	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie, +				  ARM_SMMU_CB_S2_TLBIIPAS2); +	arm_smmu_tlb_sync_context(cookie);  } -static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, -				  size_t granule, void *cookie) +static void arm_smmu_tlb_inv_leaf_s2(unsigned long iova, size_t size, +				     size_t granule, void *cookie)  { -	struct arm_smmu_domain *smmu_domain = cookie; -	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; +	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie, +				  ARM_SMMU_CB_S2_TLBIIPAS2L); +	arm_smmu_tlb_sync_context(cookie); +} -	ops->tlb_inv_range(iova, size, granule, true, cookie); -	ops->tlb_sync(cookie); +static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather, +				     unsigned long iova, size_t granule, +				     void *cookie) +{ +	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie, +				  ARM_SMMU_CB_S2_TLBIIPAS2L);  } -static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather, -				  unsigned long iova, size_t granule, -				  void *cookie) +static void arm_smmu_tlb_inv_any_s2_v1(unsigned long iova, size_t size, +				       size_t granule, void *cookie) +{ +	arm_smmu_tlb_inv_context_s2(cookie); +} +/* + * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears + * almost negligible, but the benefit of getting the first one in as far ahead + * of the sync as possible is significant, hence we don't just make this a + * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might + * think. + */ +static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather, +					unsigned long iova, size_t granule, +					void *cookie)  {  	struct arm_smmu_domain *smmu_domain = cookie; -	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; +	struct arm_smmu_device *smmu = smmu_domain->smmu; + +	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) +		wmb(); -	ops->tlb_inv_range(iova, granule, granule, true, cookie); +	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);  } -static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = { -	.tlb = { -		.tlb_flush_all	= arm_smmu_tlb_inv_context_s1, -		.tlb_flush_walk	= arm_smmu_tlb_inv_walk, -		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf, -		.tlb_add_page	= arm_smmu_tlb_add_page, -	}, -	.tlb_inv_range		= arm_smmu_tlb_inv_range_s1, -	.tlb_sync		= arm_smmu_tlb_sync_context, +static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { +	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1, +	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1, +	.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf_s1, +	.tlb_add_page	= arm_smmu_tlb_add_page_s1,  }; -static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = { -	.tlb = { -		.tlb_flush_all	= arm_smmu_tlb_inv_context_s2, -		.tlb_flush_walk	= arm_smmu_tlb_inv_walk, -		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf, -		.tlb_add_page	= arm_smmu_tlb_add_page, -	}, -	.tlb_inv_range		= arm_smmu_tlb_inv_range_s2, -	.tlb_sync		= arm_smmu_tlb_sync_context, +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { +	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2, +	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2, +	.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf_s2, +	.tlb_add_page	= arm_smmu_tlb_add_page_s2,  }; -static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = { -	.tlb = { -		.tlb_flush_all	= arm_smmu_tlb_inv_context_s2, -		.tlb_flush_walk	= arm_smmu_tlb_inv_walk, -		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf, -		.tlb_add_page	= arm_smmu_tlb_add_page, -	}, -	.tlb_inv_range		= arm_smmu_tlb_inv_vmid_nosync, -	.tlb_sync		= arm_smmu_tlb_sync_vmid, +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { +	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2, +	.tlb_flush_walk	= arm_smmu_tlb_inv_any_s2_v1, +	.tlb_flush_leaf	= arm_smmu_tlb_inv_any_s2_v1, +	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,  };  static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -472,6 +478,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)  {  	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;  	struct arm_smmu_device *smmu = dev; +	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, +				      DEFAULT_RATELIMIT_BURST);  	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);  	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0); @@ -481,11 +489,19 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)  	if (!gfsr)  		return IRQ_NONE; -	dev_err_ratelimited(smmu->dev, -		"Unexpected global fault, this could be serious\n"); -	dev_err_ratelimited(smmu->dev, -		"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", -		gfsr, gfsynr0, gfsynr1, gfsynr2); +	if (__ratelimit(&rs)) { +		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && +		    (gfsr & sGFSR_USF)) +			dev_err(smmu->dev, +				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n", +				(u16)gfsynr1); +		else +			dev_err(smmu->dev, +				"Unexpected global fault, this could be serious\n"); +		dev_err(smmu->dev, +			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n", +			gfsr, gfsynr0, gfsynr1, gfsynr2); +	}  	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);  	return IRQ_HANDLED; @@ -536,8 +552,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,  			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;  			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;  		} else { -			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; -			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; +			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair; +			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;  		}  	}  } @@ -770,7 +786,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,  		.ias		= ias,  		.oas		= oas,  		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, -		.tlb		= &smmu_domain->flush_ops->tlb, +		.tlb		= smmu_domain->flush_ops,  		.iommu_dev	= smmu->dev,  	}; @@ -1039,8 +1055,6 @@ static int arm_smmu_master_alloc_smes(struct device *dev)  	}  	group = iommu_group_get_for_dev(dev); -	if (!group) -		group = ERR_PTR(-ENOMEM);  	if (IS_ERR(group)) {  		ret = PTR_ERR(group);  		goto out_err; @@ -1154,13 +1168,27 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)  	/* Looks ok, so add the device to the domain */  	ret = arm_smmu_domain_add_master(smmu_domain, fwspec); +	/* +	 * Setup an autosuspend delay to avoid bouncing runpm state. +	 * Otherwise, if a driver for a suspended consumer device +	 * unmaps buffers, it will runpm resume/suspend for each one. +	 * +	 * For example, when used by a GPU device, when an application +	 * or game exits, it can trigger unmapping 100s or 1000s of +	 * buffers.  With a runpm cycle for each buffer, that adds up +	 * to 5-10sec worth of reprogramming the context bank, while +	 * the system appears to be locked up to the user. +	 */ +	pm_runtime_set_autosuspend_delay(smmu->dev, 20); +	pm_runtime_use_autosuspend(smmu->dev); +  rpm_put:  	arm_smmu_rpm_put(smmu);  	return ret;  }  static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, -			phys_addr_t paddr, size_t size, int prot) +			phys_addr_t paddr, size_t size, int prot, gfp_t gfp)  {  	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;  	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; @@ -1200,7 +1228,7 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)  	if (smmu_domain->flush_ops) {  		arm_smmu_rpm_get(smmu); -		smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain); +		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);  		arm_smmu_rpm_put(smmu);  	}  } @@ -1211,11 +1239,16 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,  	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);  	struct arm_smmu_device *smmu = smmu_domain->smmu; -	if (smmu_domain->flush_ops) { -		arm_smmu_rpm_get(smmu); -		smmu_domain->flush_ops->tlb_sync(smmu_domain); -		arm_smmu_rpm_put(smmu); -	} +	if (!smmu) +		return; + +	arm_smmu_rpm_get(smmu); +	if (smmu->version == ARM_SMMU_V2 || +	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1) +		arm_smmu_tlb_sync_context(smmu_domain); +	else +		arm_smmu_tlb_sync_global(smmu); +	arm_smmu_rpm_put(smmu);  }  static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, @@ -2062,10 +2095,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)  	for (i = 0; i < num_irqs; ++i) {  		int irq = platform_get_irq(pdev, i); -		if (irq < 0) { -			dev_err(dev, "failed to get irq index %d\n", i); +		if (irq < 0)  			return -ENODEV; -		}  		smmu->irqs[i] = irq;  	} | 

