1 files changed, 58 insertions, 23 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0414228cd2b5..22df17c8ca9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -81,6 +81,35 @@ enum {
  * but this remains just a hint as the kernel may choose a new location for
  * any object in the future.
  *
+ * At the level of talking to the hardware, submitting a batchbuffer for the
+ * GPU to execute is to add content to a buffer from which the HW
+ * command streamer is reading.
+ *
+ * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
+ *    Execlists, this command is not placed on the same buffer as the
+ *    remaining items.
+ *
+ * 2. Add a command to invalidate caches to the buffer.
+ *
+ * 3. Add a batchbuffer start command to the buffer; the start command is
+ *    essentially a token together with the GPU address of the batchbuffer
+ *    to be executed.
+ *
+ * 4. Add a pipeline flush to the buffer.
+ *
+ * 5. Add a memory write command to the buffer to record when the GPU
+ *    is done executing the batchbuffer. The memory write writes the
+ *    global sequence number of the request, ``i915_request::global_seqno``;
+ *    the i915 driver uses the current value in the register to determine
+ *    if the GPU has completed the batchbuffer.
+ *
+ * 6. Add a user interrupt command to the buffer. This command instructs
+ *    the GPU to issue an interrupt when the command, pipeline flush and
+ *    memory write are completed.
+ *
+ * 7. Inform the hardware of the additional commands added to the buffer
+ *    (by updating the tail pointer).
+ *
  * Processing an execbuf ioctl is conceptually split up into a few phases.
  *
  * 1. Validation - Ensure all the pointers, handles and flags are valid.
@@ -460,7 +489,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb,
+	   unsigned int i, unsigned batch_idx,
+	   struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
 	int err;
@@ -493,6 +524,24 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
 	eb->flags[i] = entry->flags;
 	vma->exec_flags = &eb->flags[i];
 
+	/*
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
+	 */
+	if (i == batch_idx) {
+		if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
+			eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
+		if (eb->reloc_cache.has_fence)
+			eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
+
+		eb->batch = vma;
+	}
+
 	err = 0;
 	if (eb_pin_vma(eb, entry, vma)) {
 		if (entry->offset != vma->node.start) {
@@ -687,7 +736,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
 	struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
 	struct drm_i915_gem_object *obj;
-	unsigned int i;
+	unsigned int i, batch;
 	int err;
 
 	if (unlikely(i915_gem_context_is_closed(eb->ctx)))
@@ -699,6 +748,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 	INIT_LIST_HEAD(&eb->relocs);
 	INIT_LIST_HEAD(&eb->unbound);
 
+	batch = eb_batch_index(eb);
+
 	for (i = 0; i < eb->buffer_count; i++) {
 		u32 handle = eb->exec[i].handle;
 		struct i915_lut_handle *lut;
@@ -733,40 +784,24 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 		}
 
 		/* transfer ref to ctx */
-		vma->open_count++;
+		if (!vma->open_count++)
+			i915_vma_reopen(vma);
 		list_add(&lut->obj_link, &obj->lut_list);
 		list_add(&lut->ctx_link, &eb->ctx->handles_list);
 		lut->ctx = eb->ctx;
 		lut->handle = handle;
 
 add_vma:
-		err = eb_add_vma(eb, i, vma);
+		err = eb_add_vma(eb, i, batch, vma);
 		if (unlikely(err))
 			goto err_vma;
 
 		GEM_BUG_ON(vma != eb->vma[i]);
 		GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+			   eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
 	}
 
-	/* take note of the batch buffer before we might reorder the lists */
-	i = eb_batch_index(eb);
-	eb->batch = eb->vma[i];
-	GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
-
-	/*
-	 * SNA is doing fancy tricks with compressing batch buffers, which leads
-	 * to negative relocation deltas. Usually that works out ok since the
-	 * relocate address is still positive, except when the batch is placed
-	 * very low in the GTT. Ensure this doesn't happen.
-	 *
-	 * Note that actual hangs have only been observed on gen7, but for
-	 * paranoia do it everywhere.
-	 */
-	if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
-		eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
-	if (eb->reloc_cache.has_fence)
-		eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
-
 	eb->args->flags |= __EXEC_VALIDATED;
 	return eb_reserve(eb);