summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/host1x/job.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x/job.c')
-rw-r--r--drivers/gpu/host1x/job.c140
1 files changed, 112 insertions, 28 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index eaa5c3352c13..a10643aa89aa 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -8,6 +8,7 @@
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/host1x.h>
+#include <linux/iommu.h>
#include <linux/kref.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
@@ -99,15 +100,19 @@ EXPORT_SYMBOL(host1x_job_add_gather);
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
+ struct host1x_client *client = job->client;
+ struct device *dev = client->dev;
+ struct iommu_domain *domain;
unsigned int i;
int err;
+ domain = iommu_get_domain_for_dev(dev);
job->num_unpins = 0;
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
+ dma_addr_t phys_addr, *phys;
struct sg_table *sgt;
- dma_addr_t phys_addr;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -115,7 +120,62 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
+ /*
+ * If the client device is not attached to an IOMMU, the
+ * physical address of the buffer object can be used.
+ *
+ * Similarly, when an IOMMU domain is shared between all
+ * host1x clients, the IOVA is already available, so no
+ * need to map the buffer object again.
+ *
+ * XXX Note that this isn't always safe to do because it
+ * relies on an assumption that no cache maintenance is
+ * needed on the buffer objects.
+ */
+ if (!domain || client->group)
+ phys = &phys_addr;
+ else
+ phys = NULL;
+
+ sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
+ if (IS_ERR(sgt)) {
+ err = PTR_ERR(sgt);
+ goto unpin;
+ }
+
+ if (sgt) {
+ unsigned long mask = HOST1X_RELOC_READ |
+ HOST1X_RELOC_WRITE;
+ enum dma_data_direction dir;
+
+ switch (reloc->flags & mask) {
+ case HOST1X_RELOC_READ:
+ dir = DMA_TO_DEVICE;
+ break;
+
+ case HOST1X_RELOC_WRITE:
+ dir = DMA_FROM_DEVICE;
+ break;
+
+ case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+ dir = DMA_BIDIRECTIONAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ goto unpin;
+ }
+
+ err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
+ if (!err) {
+ err = -ENOMEM;
+ goto unpin;
+ }
+
+ job->unpins[job->num_unpins].dev = dev;
+ job->unpins[job->num_unpins].dir = dir;
+ phys_addr = sg_dma_address(sgt->sgl);
+ }
job->addr_phys[job->num_unpins] = phys_addr;
job->unpins[job->num_unpins].bo = reloc->target.bo;
@@ -131,6 +191,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
+ dma_addr_t *phys;
unsigned int j;
g->bo = host1x_bo_get(g->bo);
@@ -139,7 +200,21 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- phys_addr = host1x_bo_pin(g->bo, &sgt);
+ /**
+ * If the host1x is not attached to an IOMMU, there is no need
+ * to map the buffer object for the host1x, since the physical
+ * address can simply be used.
+ */
+ if (!iommu_get_domain_for_dev(host->dev))
+ phys = &phys_addr;
+ else
+ phys = NULL;
+
+ sgt = host1x_bo_pin(host->dev, g->bo, phys);
+ if (IS_ERR(sgt)) {
+ err = PTR_ERR(sgt);
+ goto unpin;
+ }
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
for_each_sg(sgt->sgl, sg, sgt->nents, j)
@@ -163,14 +238,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- job->addr_phys[job->num_unpins] =
- iova_dma_addr(&host->iova, alloc);
job->unpins[job->num_unpins].size = gather_size;
- } else {
- job->addr_phys[job->num_unpins] = phys_addr;
+ phys_addr = iova_dma_addr(&host->iova, alloc);
+ } else if (sgt) {
+ err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
+ DMA_TO_DEVICE);
+ if (!err) {
+ err = -ENOMEM;
+ goto unpin;
+ }
+
+ job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
+ job->unpins[job->num_unpins].dev = host->dev;
+ phys_addr = sg_dma_address(sgt->sgl);
}
- job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
+ job->addr_phys[job->num_unpins] = phys_addr;
+ job->gather_addr_phys[i] = phys_addr;
job->unpins[job->num_unpins].bo = g->bo;
job->unpins[job->num_unpins].sgt = sgt;
@@ -186,8 +270,7 @@ unpin:
static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
{
- u32 last_page = ~0;
- void *cmdbuf_page_addr = NULL;
+ void *cmdbuf_addr = NULL;
struct host1x_bo *cmdbuf = g->bo;
unsigned int i;
@@ -209,28 +292,22 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
goto patch_reloc;
}
- if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
- if (cmdbuf_page_addr)
- host1x_bo_kunmap(cmdbuf, last_page,
- cmdbuf_page_addr);
-
- cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
- reloc->cmdbuf.offset >> PAGE_SHIFT);
- last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
+ if (!cmdbuf_addr) {
+ cmdbuf_addr = host1x_bo_mmap(cmdbuf);
- if (unlikely(!cmdbuf_page_addr)) {
+ if (unlikely(!cmdbuf_addr)) {
pr_err("Could not map cmdbuf for relocation\n");
return -ENOMEM;
}
}
- target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
+ target = cmdbuf_addr + reloc->cmdbuf.offset;
patch_reloc:
*target = reloc_addr;
}
- if (cmdbuf_page_addr)
- host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
+ if (cmdbuf_addr)
+ host1x_bo_munmap(cmdbuf, cmdbuf_addr);
return 0;
}
@@ -436,7 +513,8 @@ out:
return err;
}
-static inline int copy_gathers(struct host1x_job *job, struct device *dev)
+static inline int copy_gathers(struct device *host, struct host1x_job *job,
+ struct device *dev)
{
struct host1x_firewall fw;
size_t size = 0;
@@ -459,12 +537,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
* Try a non-blocking allocation from a higher priority pools first,
* as awaiting for the allocation here is a major performance hit.
*/
- job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
+ job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
GFP_NOWAIT);
/* the higher priority allocation failed, try the generic-blocking */
if (!job->gather_copy_mapped)
- job->gather_copy_mapped = dma_alloc_wc(dev, size,
+ job->gather_copy_mapped = dma_alloc_wc(host, size,
&job->gather_copy,
GFP_KERNEL);
if (!job->gather_copy_mapped)
@@ -512,7 +590,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
goto out;
if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
- err = copy_gathers(job, dev);
+ err = copy_gathers(host->dev, job, dev);
if (err)
goto out;
}
@@ -557,6 +635,8 @@ void host1x_job_unpin(struct host1x_job *job)
for (i = 0; i < job->num_unpins; i++) {
struct host1x_job_unpin_data *unpin = &job->unpins[i];
+ struct device *dev = unpin->dev ?: host->dev;
+ struct sg_table *sgt = unpin->sgt;
if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
unpin->size && host->domain) {
@@ -566,14 +646,18 @@ void host1x_job_unpin(struct host1x_job *job)
iova_pfn(&host->iova, job->addr_phys[i]));
}
- host1x_bo_unpin(unpin->bo, unpin->sgt);
+ if (unpin->dev && sgt)
+ dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents,
+ unpin->dir);
+
+ host1x_bo_unpin(dev, unpin->bo, sgt);
host1x_bo_put(unpin->bo);
}
job->num_unpins = 0;
if (job->gather_copy_size)
- dma_free_wc(job->channel->dev, job->gather_copy_size,
+ dma_free_wc(host->dev, job->gather_copy_size,
job->gather_copy_mapped, job->gather_copy);
}
EXPORT_SYMBOL(host1x_job_unpin);
OpenPOWER on IntegriCloud