diff options
Diffstat (limited to 'drivers/gpu/host1x/job.c')
-rw-r--r-- | drivers/gpu/host1x/job.c | 140 |
1 files changed, 112 insertions, 28 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index eaa5c3352c13..a10643aa89aa 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -8,6 +8,7 @@ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/host1x.h> +#include <linux/iommu.h> #include <linux/kref.h> #include <linux/module.h> #include <linux/scatterlist.h> @@ -99,15 +100,19 @@ EXPORT_SYMBOL(host1x_job_add_gather); static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + struct host1x_client *client = job->client; + struct device *dev = client->dev; + struct iommu_domain *domain; unsigned int i; int err; + domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; + dma_addr_t phys_addr, *phys; struct sg_table *sgt; - dma_addr_t phys_addr; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -115,7 +120,62 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); + /* + * If the client device is not attached to an IOMMU, the + * physical address of the buffer object can be used. + * + * Similarly, when an IOMMU domain is shared between all + * host1x clients, the IOVA is already available, so no + * need to map the buffer object again. + * + * XXX Note that this isn't always safe to do because it + * relies on an assumption that no cache maintenance is + * needed on the buffer objects. + */ + if (!domain || client->group) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(dev, reloc->target.bo, phys); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } + + if (sgt) { + unsigned long mask = HOST1X_RELOC_READ | + HOST1X_RELOC_WRITE; + enum dma_data_direction dir; + + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + dir = DMA_TO_DEVICE; + break; + + case HOST1X_RELOC_WRITE: + dir = DMA_FROM_DEVICE; + break; + + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + dir = DMA_BIDIRECTIONAL; + break; + + default: + err = -EINVAL; + goto unpin; + } + + err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir); + if (!err) { + err = -ENOMEM; + goto unpin; + } + + job->unpins[job->num_unpins].dev = dev; + job->unpins[job->num_unpins].dir = dir; + phys_addr = sg_dma_address(sgt->sgl); + } job->addr_phys[job->num_unpins] = phys_addr; job->unpins[job->num_unpins].bo = reloc->target.bo; @@ -131,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; + dma_addr_t *phys; unsigned int j; g->bo = host1x_bo_get(g->bo); @@ -139,7 +200,21 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - phys_addr = host1x_bo_pin(g->bo, &sgt); + /** + * If the host1x is not attached to an IOMMU, there is no need + * to map the buffer object for the host1x, since the physical + * address can simply be used. + */ + if (!iommu_get_domain_for_dev(host->dev)) + phys = &phys_addr; + else + phys = NULL; + + sgt = host1x_bo_pin(host->dev, g->bo, phys); + if (IS_ERR(sgt)) { + err = PTR_ERR(sgt); + goto unpin; + } if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { for_each_sg(sgt->sgl, sg, sgt->nents, j) @@ -163,14 +238,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - job->addr_phys[job->num_unpins] = - iova_dma_addr(&host->iova, alloc); job->unpins[job->num_unpins].size = gather_size; - } else { - job->addr_phys[job->num_unpins] = phys_addr; + phys_addr = iova_dma_addr(&host->iova, alloc); + } else if (sgt) { + err = dma_map_sg(host->dev, sgt->sgl, sgt->nents, + DMA_TO_DEVICE); + if (!err) { + err = -ENOMEM; + goto unpin; + } + + job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; + job->unpins[job->num_unpins].dev = host->dev; + phys_addr = sg_dma_address(sgt->sgl); } - job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; + job->addr_phys[job->num_unpins] = phys_addr; + job->gather_addr_phys[i] = phys_addr; job->unpins[job->num_unpins].bo = g->bo; job->unpins[job->num_unpins].sgt = sgt; @@ -186,8 +270,7 @@ unpin: static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) { - u32 last_page = ~0; - void *cmdbuf_page_addr = NULL; + void *cmdbuf_addr = NULL; struct host1x_bo *cmdbuf = g->bo; unsigned int i; @@ -209,28 +292,22 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) goto patch_reloc; } - if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) { - if (cmdbuf_page_addr) - host1x_bo_kunmap(cmdbuf, last_page, - cmdbuf_page_addr); - - cmdbuf_page_addr = host1x_bo_kmap(cmdbuf, - reloc->cmdbuf.offset >> PAGE_SHIFT); - last_page = reloc->cmdbuf.offset >> PAGE_SHIFT; + if (!cmdbuf_addr) { + cmdbuf_addr = host1x_bo_mmap(cmdbuf); - if (unlikely(!cmdbuf_page_addr)) { + if (unlikely(!cmdbuf_addr)) { pr_err("Could not map cmdbuf for relocation\n"); return -ENOMEM; } } - target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK); + target = cmdbuf_addr + reloc->cmdbuf.offset; patch_reloc: *target = reloc_addr; } - if (cmdbuf_page_addr) - host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr); + if (cmdbuf_addr) + host1x_bo_munmap(cmdbuf, cmdbuf_addr); return 0; } @@ -436,7 +513,8 @@ out: return err; } -static inline int copy_gathers(struct host1x_job *job, struct device *dev) +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) { struct host1x_firewall fw; size_t size = 0; @@ -459,12 +537,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(dev, size, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -512,7 +590,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) goto out; if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(job, dev); + err = copy_gathers(host->dev, job, dev); if (err) goto out; } @@ -557,6 +635,8 @@ void host1x_job_unpin(struct host1x_job *job) for (i = 0; i < job->num_unpins; i++) { struct host1x_job_unpin_data *unpin = &job->unpins[i]; + struct device *dev = unpin->dev ?: host->dev; + struct sg_table *sgt = unpin->sgt; if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && unpin->size && host->domain) { @@ -566,14 +646,18 @@ void host1x_job_unpin(struct host1x_job *job) iova_pfn(&host->iova, job->addr_phys[i])); } - host1x_bo_unpin(unpin->bo, unpin->sgt); + if (unpin->dev && sgt) + dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents, + unpin->dir); + + host1x_bo_unpin(dev, unpin->bo, sgt); host1x_bo_put(unpin->bo); } job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(job->channel->dev, job->gather_copy_size, + dma_free_wc(host->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); |