1 files changed, 186 insertions, 75 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 697af5add78b..c7c85134a84a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -139,6 +139,7 @@
 #include "i915_gem_render_state.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
+#include "intel_workarounds.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
 #define RING_EXECLIST1_VALID		(1 << 0x3)
@@ -183,7 +184,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
 				const struct i915_request *last,
 				int prio)
 {
-	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+	return (intel_engine_has_preemption(engine) &&
+		__execlists_need_preempt(prio, rq_prio(last)));
 }
 
 /**
@@ -374,6 +376,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
 				   status, rq);
 }
 
+inline void
+execlists_user_begin(struct intel_engine_execlists *execlists,
+		     const struct execlist_port *port)
+{
+	execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+}
+
+inline void
+execlists_user_end(struct intel_engine_execlists *execlists)
+{
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+}
+
 static inline void
 execlists_context_schedule_in(struct i915_request *rq)
 {
@@ -454,10 +469,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 			desc = execlists_update_context(rq);
 			GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
 
-			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x, prio=%d\n",
+			GEM_TRACE("%s in[%d]:  ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
 				  engine->name, n,
 				  port[n].context_id, count,
 				  rq->global_seqno,
+				  rq->fence.context, rq->fence.seqno,
+				  intel_engine_get_seqno(engine),
 				  rq_prio(rq));
 		} else {
 			GEM_BUG_ON(!n);
@@ -577,6 +594,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		 * know the next preemption status we see corresponds
 		 * to this ELSP update.
 		 */
+		GEM_BUG_ON(!execlists_is_active(execlists,
+						EXECLISTS_ACTIVE_USER));
 		GEM_BUG_ON(!port_count(&port[0]));
 		if (port_count(&port[0]) > 1)
 			goto unlock;
@@ -695,8 +714,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		if (p->priority != I915_PRIORITY_NORMAL)
 			kmem_cache_free(engine->i915->priorities, p);
 	}
+
 done:
-	execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
+	/*
+	 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer.
+	 *
+	 * We choose queue_priority such that if we add a request of greater
+	 * priority than this, we kick the submission tasklet to decide on
+	 * the right order of submitting the requests to hardware. We must
+	 * also be prepared to reorder requests as they are in-flight on the
+	 * HW. We derive the queue_priority then as the first "hole" in
+	 * the HW submission ports and if there are no available slots,
+	 * the priority of the lowest executing request, i.e. last.
+	 *
+	 * When we do receive a higher priority request ready to run from the
+	 * user, see queue_request(), the queue_priority is bumped to that
+	 * request triggering preemption on the next dequeue (or subsequent
+	 * interrupt for secondary ports).
+	 */
+	execlists->queue_priority =
+		port != execlists->port ? rq_prio(last) : INT_MIN;
+
 	execlists->first = rb;
 	if (submit)
 		port_assign(port, last);
@@ -708,7 +746,7 @@ unlock:
 	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit) {
-		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
+		execlists_user_begin(execlists, execlists->port);
 		execlists_submit_ports(engine);
 	}
 
@@ -725,6 +763,13 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 	while (num_ports-- && port_isset(port)) {
 		struct i915_request *rq = port_request(port);
 
+		GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n",
+			  rq->engine->name,
+			  (unsigned int)(port - execlists->port),
+			  rq->global_seqno,
+			  rq->fence.context, rq->fence.seqno,
+			  intel_engine_get_seqno(rq->engine));
+
 		GEM_BUG_ON(!execlists->active);
 		intel_engine_context_out(rq->engine);
 
@@ -738,6 +783,59 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
 		memset(port, 0, sizeof(*port));
 		port++;
 	}
+
+	execlists_user_end(execlists);
+}
+
+static void clear_gtiir(struct intel_engine_cs *engine)
+{
+	static const u8 gtiir[] = {
+		[RCS]  = 0,
+		[BCS]  = 0,
+		[VCS]  = 1,
+		[VCS2] = 1,
+		[VECS] = 3,
+	};
+	struct drm_i915_private *dev_priv = engine->i915;
+	int i;
+
+	/* TODO: correctly reset irqs for gen11 */
+	if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11))
+		return;
+
+	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+	/*
+	 * Clear any pending interrupt state.
+	 *
+	 * We do it twice out of paranoia that some of the IIR are
+	 * double buffered, and so if we only reset it once there may
+	 * still be an interrupt pending.
+	 */
+	for (i = 0; i < 2; i++) {
+		I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+			   engine->irq_keep_mask);
+		POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+	}
+	GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+		   engine->irq_keep_mask);
+}
+
+static void reset_irq(struct intel_engine_cs *engine)
+{
+	/* Mark all CS interrupts as complete */
+	smp_store_mb(engine->execlists.active, 0);
+	synchronize_hardirq(engine->i915->drm.irq);
+
+	clear_gtiir(engine);
+
+	/*
+	 * The port is checked prior to scheduling a tasklet, but
+	 * just in case we have suspended the tasklet to do the
+	 * wedging make sure that when it wakes, it decides there
+	 * is no work to do by clearing the irq_posted bit.
+	 */
+	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
 }
 
 static void execlists_cancel_requests(struct intel_engine_cs *engine)
@@ -747,7 +845,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 	struct rb_node *rb;
 	unsigned long flags;
 
-	GEM_TRACE("%s\n", engine->name);
+	GEM_TRACE("%s current %d\n",
+		  engine->name, intel_engine_get_seqno(engine));
 
 	/*
 	 * Before we call engine->cancel_requests(), we should have exclusive
@@ -767,6 +866,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	/* Cancel the requests on the HW and clear the ELSP tracker. */
 	execlists_cancel_port_requests(execlists);
+	reset_irq(engine);
 
 	spin_lock(&engine->timeline->lock);
 
@@ -805,17 +905,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
 
 	spin_unlock(&engine->timeline->lock);
 
-	/*
-	 * The port is checked prior to scheduling a tasklet, but
-	 * just in case we have suspended the tasklet to do the
-	 * wedging make sure that when it wakes, it decides there
-	 * is no work to do by clearing the irq_posted bit.
-	 */
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-
-	/* Mark all CS interrupts as complete */
-	execlists->active = 0;
-
 	local_irq_restore(flags);
 }
 
@@ -827,7 +916,7 @@ static void execlists_submission_tasklet(unsigned long data)
 {
 	struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 	struct intel_engine_execlists * const execlists = &engine->execlists;
-	struct execlist_port * const port = execlists->port;
+	struct execlist_port *port = execlists->port;
 	struct drm_i915_private *dev_priv = engine->i915;
 	bool fw = false;
 
@@ -954,10 +1043,13 @@ static void execlists_submission_tasklet(unsigned long data)
 							EXECLISTS_ACTIVE_USER));
 
 			rq = port_unpack(port, &count);
-			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+			GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n",
 				  engine->name,
 				  port->context_id, count,
 				  rq ? rq->global_seqno : 0,
+				  rq ? rq->fence.context : 0,
+				  rq ? rq->fence.seqno : 0,
+				  intel_engine_get_seqno(engine),
 				  rq ? rq_prio(rq) : 0);
 
 			/* Check the context/desc id for this event matches */
@@ -965,10 +1057,28 @@ static void execlists_submission_tasklet(unsigned long data)
 
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
+				/*
+				 * On the final event corresponding to the
+				 * submission of this context, we expect either
+				 * an element-switch event or a completion
+				 * event (and on completion, the active-idle
+				 * marker). No more preemptions, lite-restore
+				 * or otherwise.
+				 */
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
 				GEM_BUG_ON(port_isset(&port[1]) &&
 					   !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+				GEM_BUG_ON(!port_isset(&port[1]) &&
+					   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+
+				/*
+				 * We rely on the hardware being strongly
+				 * ordered, that the breadcrumb write is
+				 * coherent (visible from the CPU) before the
+				 * user interrupt and CSB is processed.
+				 */
 				GEM_BUG_ON(!i915_request_completed(rq));
+
 				execlists_context_schedule_out(rq);
 				trace_i915_request_out(rq);
 				i915_request_put(rq);
@@ -976,17 +1086,14 @@ static void execlists_submission_tasklet(unsigned long data)
 				GEM_TRACE("%s completed ctx=%d\n",
 					  engine->name, port->context_id);
 
-				execlists_port_complete(execlists, port);
+				port = execlists_port_complete(execlists, port);
+				if (port_isset(port))
+					execlists_user_begin(execlists, port);
+				else
+					execlists_user_end(execlists);
 			} else {
 				port_set(port, port_pack(rq, count));
 			}
-
-			/* After the final element, the hw should be idle */
-			GEM_BUG_ON(port_count(port) == 0 &&
-				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-			if (port_count(port) == 0)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_USER);
 		}
 
 		if (head != execlists->csb_head) {
@@ -1001,6 +1108,11 @@ static void execlists_submission_tasklet(unsigned long data)
 
 	if (fw)
 		intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+
+	/* If the engine is now idle, so should be the flag; and vice versa. */
+	GEM_BUG_ON(execlists_is_active(&engine->execlists,
+				       EXECLISTS_ACTIVE_USER) ==
+		   !port_isset(engine->execlists.port));
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1010,12 +1122,16 @@ static void queue_request(struct intel_engine_cs *engine,
 	list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests);
 }
 
+static void __submit_queue(struct intel_engine_cs *engine, int prio)
+{
+	engine->execlists.queue_priority = prio;
+	tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
 static void submit_queue(struct intel_engine_cs *engine, int prio)
 {
-	if (prio > engine->execlists.queue_priority) {
-		engine->execlists.queue_priority = prio;
-		tasklet_hi_schedule(&engine->execlists.tasklet);
-	}
+	if (prio > engine->execlists.queue_priority)
+		__submit_queue(engine, prio);
 }
 
 static void execlists_submit_request(struct i915_request *request)
@@ -1148,7 +1264,10 @@ static void execlists_schedule(struct i915_request *request, int prio)
 			__list_del_entry(&pt->link);
 			queue_request(engine, pt, prio);
 		}
-		submit_queue(engine, prio);
+
+		if (prio > engine->execlists.queue_priority &&
+		    i915_sw_fence_done(&pt_to_request(pt)->submit))
+			__submit_queue(engine, prio);
 	}
 
 	spin_unlock_irq(&engine->timeline->lock);
@@ -1215,6 +1334,7 @@ execlists_context_pin(struct intel_engine_cs *engine,
 	ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
 	ce->lrc_reg_state[CTX_RING_BUFFER_START+1] =
 		i915_ggtt_offset(ce->ring->vma);
+	ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head;
 
 	ce->state->obj->pin_global++;
 	i915_gem_context_get(ctx);
@@ -1565,14 +1685,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
 	return ret;
 }
 
-static u8 gtiir[] = {
-	[RCS] = 0,
-	[BCS] = 0,
-	[VCS] = 1,
-	[VCS2] = 1,
-	[VECS] = 3,
-};
-
 static void enable_execlists(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -1632,6 +1744,10 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
+	ret = intel_whitelist_workarounds_apply(engine);
+	if (ret)
+		return ret;
+
 	/* We need to disable the AsyncFlip performance optimisations in order
 	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
 	 * programmed to '1' on all products.
@@ -1642,7 +1758,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine)
 
 	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
-	return init_workarounds_ring(engine);
+	return 0;
 }
 
 static int gen9_init_render_ring(struct intel_engine_cs *engine)
@@ -1653,32 +1769,11 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
-	return init_workarounds_ring(engine);
-}
-
-static void reset_irq(struct intel_engine_cs *engine)
-{
-	struct drm_i915_private *dev_priv = engine->i915;
-	int i;
-
-	GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
-
-	/*
-	 * Clear any pending interrupt state.
-	 *
-	 * We do it twice out of paranoia that some of the IIR are double
-	 * buffered, and if we only reset it once there may still be
-	 * an interrupt pending.
-	 */
-	for (i = 0; i < 2; i++) {
-		I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
-			   GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
-		POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
-	}
-	GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
-		   (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+	ret = intel_whitelist_workarounds_apply(engine);
+	if (ret)
+		return ret;
 
-	clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+	return 0;
 }
 
 static void reset_common_ring(struct intel_engine_cs *engine,
@@ -1688,14 +1783,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	struct intel_context *ce;
 	unsigned long flags;
 
-	GEM_TRACE("%s seqno=%x\n",
-		  engine->name, request ? request->global_seqno : 0);
+	GEM_TRACE("%s request global=%x, current=%d\n",
+		  engine->name, request ? request->global_seqno : 0,
+		  intel_engine_get_seqno(engine));
 
 	/* See execlists_cancel_requests() for the irq/spinlock split. */
 	local_irq_save(flags);
 
-	reset_irq(engine);
-
 	/*
 	 * Catch up with any missed context-switch interrupts.
 	 *
@@ -1706,15 +1800,13 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * requests were completed.
 	 */
 	execlists_cancel_port_requests(execlists);
+	reset_irq(engine);
 
 	/* Push back any incomplete requests for replay after the reset. */
 	spin_lock(&engine->timeline->lock);
 	__unwind_incomplete_requests(engine);
 	spin_unlock(&engine->timeline->lock);
 
-	/* Mark all CS interrupts as complete */
-	execlists->active = 0;
-
 	local_irq_restore(flags);
 
 	/*
@@ -2006,7 +2098,7 @@ static int gen8_init_rcs_context(struct i915_request *rq)
 {
 	int ret;
 
-	ret = intel_ring_workarounds_emit(rq);
+	ret = intel_ctx_workarounds_emit(rq);
 	if (ret)
 		return ret;
 
@@ -2066,11 +2158,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
+	if (engine->i915->preempt_context)
+		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
 		I915_SCHEDULER_CAP_ENABLED |
 		I915_SCHEDULER_CAP_PRIORITY;
-	if (engine->i915->preempt_context)
+	if (intel_engine_has_preemption(engine))
 		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
 
@@ -2109,7 +2203,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
 static inline void
 logical_ring_default_irqs(struct intel_engine_cs *engine)
 {
-	unsigned shift = engine->irq_shift;
+	unsigned int shift = 0;
+
+	if (INTEL_GEN(engine->i915) < 11) {
+		const u8 irq_shifts[] = {
+			[RCS]  = GEN8_RCS_IRQ_SHIFT,
+			[BCS]  = GEN8_BCS_IRQ_SHIFT,
+			[VCS]  = GEN8_VCS1_IRQ_SHIFT,
+			[VCS2] = GEN8_VCS2_IRQ_SHIFT,
+			[VECS] = GEN8_VECS_IRQ_SHIFT,
+		};
+
+		shift = irq_shifts[engine->id];
+	}
+
 	engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
 	engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
 }
@@ -2542,3 +2649,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 		}
 	}
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_lrc.c"
+#endif