summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c81
1 files changed, 58 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0414228cd2b5..22df17c8ca9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -81,6 +81,35 @@ enum {
* but this remains just a hint as the kernel may choose a new location for
* any object in the future.
*
+ * At the level of talking to the hardware, submitting a batchbuffer for the
+ * GPU to execute is to add content to a buffer from which the HW
+ * command streamer is reading.
+ *
+ * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
+ * Execlists, this command is not placed on the same buffer as the
+ * remaining items.
+ *
+ * 2. Add a command to invalidate caches to the buffer.
+ *
+ * 3. Add a batchbuffer start command to the buffer; the start command is
+ * essentially a token together with the GPU address of the batchbuffer
+ * to be executed.
+ *
+ * 4. Add a pipeline flush to the buffer.
+ *
+ * 5. Add a memory write command to the buffer to record when the GPU
+ * is done executing the batchbuffer. The memory write writes the
+ * global sequence number of the request, ``i915_request::global_seqno``;
+ * the i915 driver uses the current value in the register to determine
+ * if the GPU has completed the batchbuffer.
+ *
+ * 6. Add a user interrupt command to the buffer. This command instructs
+ * the GPU to issue an interrupt when the command, pipeline flush and
+ * memory write are completed.
+ *
+ * 7. Inform the hardware of the additional commands added to the buffer
+ * (by updating the tail pointer).
+ *
* Processing an execbuf ioctl is conceptually split up into a few phases.
*
* 1. Validation - Ensure all the pointers, handles and flags are valid.
@@ -460,7 +489,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
}
static int
-eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb,
+ unsigned int i, unsigned batch_idx,
+ struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
int err;
@@ -493,6 +524,24 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
eb->flags[i] = entry->flags;
vma->exec_flags = &eb->flags[i];
+ /*
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
+ */
+ if (i == batch_idx) {
+ if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
+ eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
+ if (eb->reloc_cache.has_fence)
+ eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
+
+ eb->batch = vma;
+ }
+
err = 0;
if (eb_pin_vma(eb, entry, vma)) {
if (entry->offset != vma->node.start) {
@@ -687,7 +736,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
{
struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
struct drm_i915_gem_object *obj;
- unsigned int i;
+ unsigned int i, batch;
int err;
if (unlikely(i915_gem_context_is_closed(eb->ctx)))
@@ -699,6 +748,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
INIT_LIST_HEAD(&eb->relocs);
INIT_LIST_HEAD(&eb->unbound);
+ batch = eb_batch_index(eb);
+
for (i = 0; i < eb->buffer_count; i++) {
u32 handle = eb->exec[i].handle;
struct i915_lut_handle *lut;
@@ -733,40 +784,24 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
}
/* transfer ref to ctx */
- vma->open_count++;
+ if (!vma->open_count++)
+ i915_vma_reopen(vma);
list_add(&lut->obj_link, &obj->lut_list);
list_add(&lut->ctx_link, &eb->ctx->handles_list);
lut->ctx = eb->ctx;
lut->handle = handle;
add_vma:
- err = eb_add_vma(eb, i, vma);
+ err = eb_add_vma(eb, i, batch, vma);
if (unlikely(err))
goto err_vma;
GEM_BUG_ON(vma != eb->vma[i]);
GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+ GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+ eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
}
- /* take note of the batch buffer before we might reorder the lists */
- i = eb_batch_index(eb);
- eb->batch = eb->vma[i];
- GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
-
- /*
- * SNA is doing fancy tricks with compressing batch buffers, which leads
- * to negative relocation deltas. Usually that works out ok since the
- * relocate address is still positive, except when the batch is placed
- * very low in the GTT. Ensure this doesn't happen.
- *
- * Note that actual hangs have only been observed on gen7, but for
- * paranoia do it everywhere.
- */
- if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
- eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
- if (eb->reloc_cache.has_fence)
- eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
-
eb->args->flags |= __EXEC_VALIDATED;
return eb_reserve(eb);
OpenPOWER on IntegriCloud