1 files changed, 2593 insertions, 332 deletions
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 401e8b539297..65718ca2326e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
 #include <linux/prime_numbers.h>
 
 #include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
 #include "gt/intel_reset.h"
 
 #include "i915_selftest.h"
@@ -19,75 +20,776 @@
 #include "gem/selftests/igt_gem_utils.h"
 #include "gem/selftests/mock_context.h"
 
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int err;
+
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		i915_gem_object_put(obj);
+		return vma;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err) {
+		i915_gem_object_put(obj);
+		return ERR_PTR(err);
+	}
+
+	return vma;
+}
+
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+				     unsigned long *saved)
+{
+	*saved = engine->props.heartbeat_interval_ms;
+	engine->props.heartbeat_interval_ms = 0;
+
+	intel_engine_pm_get(engine);
+	intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+				    unsigned long saved)
+{
+	intel_engine_pm_put(engine);
+
+	engine->props.heartbeat_interval_ms = saved;
+}
+
 static int live_sanitycheck(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
-	struct i915_gem_context *ctx;
 	enum intel_engine_id id;
 	struct igt_spinner spin;
-	intel_wakeref_t wakeref;
-	int err = -ENOMEM;
+	int err = 0;
 
-	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+	if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin, i915))
-		goto err_unlock;
-
-	ctx = kernel_context(i915);
-	if (!ctx)
-		goto err_spin;
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
 		struct i915_request *rq;
 
-		rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
-			goto err_ctx;
+			goto out_ctx;
 		}
 
 		i915_request_add(rq);
 		if (!igt_wait_for_spinner(&spin, rq)) {
 			GEM_TRACE("spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
-			goto err_ctx;
+			goto out_ctx;
 		}
 
 		igt_spinner_end(&spin);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(gt->i915)) {
 			err = -EIO;
-			goto err_ctx;
+			goto out_ctx;
 		}
+
+out_ctx:
+		intel_context_put(ce);
+		if (err)
+			break;
 	}
 
+	igt_spinner_fini(&spin);
+	return err;
+}
+
+static int live_unlite_restore(struct intel_gt *gt, int prio)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = -ENOMEM;
+
+	/*
+	 * Check that we can correctly context switch between 2 instances
+	 * on the same engine from the same parent context.
+	 */
+
+	if (igt_spinner_init(&spin, gt))
+		return err;
+
 	err = 0;
-err_ctx:
-	kernel_context_close(ctx);
-err_spin:
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce[2] = {};
+		struct i915_request *rq[2];
+		struct igt_live_test t;
+		unsigned long saved;
+		int n;
+
+		if (prio && !intel_engine_has_preemption(engine))
+			continue;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+			err = -EIO;
+			break;
+		}
+		engine_heartbeat_disable(engine, &saved);
+
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			struct intel_context *tmp;
+
+			tmp = intel_context_create(engine);
+			if (IS_ERR(tmp)) {
+				err = PTR_ERR(tmp);
+				goto err_ce;
+			}
+
+			err = intel_context_pin(tmp);
+			if (err) {
+				intel_context_put(tmp);
+				goto err_ce;
+			}
+
+			/*
+			 * Setup the pair of contexts such that if we
+			 * lite-restore using the RING_TAIL from ce[1] it
+			 * will execute garbage from ce[0]->ring.
+			 */
+			memset(tmp->ring->vaddr,
+			       POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+			       tmp->ring->vma->size);
+
+			ce[n] = tmp;
+		}
+		GEM_BUG_ON(!ce[1]->ring->size);
+		intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+		__execlists_update_reg_state(ce[1], engine);
+
+		rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			goto err_ce;
+		}
+
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+		GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
+
+		if (!igt_wait_for_spinner(&spin, rq[0])) {
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		rq[1] = i915_request_create(ce[1]);
+		if (IS_ERR(rq[1])) {
+			err = PTR_ERR(rq[1]);
+			i915_request_put(rq[0]);
+			goto err_ce;
+		}
+
+		if (!prio) {
+			/*
+			 * Ensure we do the switch to ce[1] on completion.
+			 *
+			 * rq[0] is already submitted, so this should reduce
+			 * to a no-op (a wait on a request on the same engine
+			 * uses the submit fence, not the completion fence),
+			 * but it will install a dependency on rq[1] for rq[0]
+			 * that will prevent the pair being reordered by
+			 * timeslicing.
+			 */
+			i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+		}
+
+		i915_request_get(rq[1]);
+		i915_request_add(rq[1]);
+		GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
+		i915_request_put(rq[0]);
+
+		if (prio) {
+			struct i915_sched_attr attr = {
+				.priority = prio,
+			};
+
+			/* Alternatively preempt the spinner with ce[1] */
+			engine->schedule(rq[1], &attr);
+		}
+
+		/* And switch back to ce[0] for good measure */
+		rq[0] = i915_request_create(ce[0]);
+		if (IS_ERR(rq[0])) {
+			err = PTR_ERR(rq[0]);
+			i915_request_put(rq[1]);
+			goto err_ce;
+		}
+
+		i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+		i915_request_get(rq[0]);
+		i915_request_add(rq[0]);
+		GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
+		i915_request_put(rq[1]);
+		i915_request_put(rq[0]);
+
+err_ce:
+		tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+		igt_spinner_end(&spin);
+		for (n = 0; n < ARRAY_SIZE(ce); n++) {
+			if (IS_ERR_OR_NULL(ce[n]))
+				break;
+
+			intel_context_unpin(ce[n]);
+			intel_context_put(ce[n]);
+		}
+
+		engine_heartbeat_enable(engine, saved);
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			break;
+	}
+
+	igt_spinner_fini(&spin);
+	return err;
+}
+
+static int live_unlite_switch(void *arg)
+{
+	return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+	return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
+static int live_hold_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_spinner spin;
+	int err = 0;
+
+	/*
+	 * In order to support offline error capture for fast preempt reset,
+	 * we need to decouple the guilty request and ensure that it and its
+	 * descendents are not executed while the capture is in progress.
+	 */
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	if (igt_spinner_init(&spin, gt))
+		return -ENOMEM;
+
+	for_each_engine(engine, gt, id) {
+		struct intel_context *ce;
+		unsigned long heartbeat;
+		struct i915_request *rq;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			break;
+		}
+
+		engine_heartbeat_disable(engine, &heartbeat);
+
+		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out;
+		}
+		i915_request_add(rq);
+
+		if (!igt_wait_for_spinner(&spin, rq)) {
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+			goto out;
+		}
+
+		/* We have our request executing, now remove it and reset */
+
+		if (test_and_set_bit(I915_RESET_ENGINE + id,
+				     &gt->reset.flags)) {
+			intel_gt_set_wedged(gt);
+			err = -EBUSY;
+			goto out;
+		}
+		tasklet_disable(&engine->execlists.tasklet);
+
+		engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+		i915_request_get(rq);
+		execlists_hold(engine, rq);
+		GEM_BUG_ON(!i915_request_on_hold(rq));
+
+		intel_engine_reset(engine, NULL);
+		GEM_BUG_ON(rq->fence.error != -EIO);
+
+		tasklet_enable(&engine->execlists.tasklet);
+		clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+				      &gt->reset.flags);
+
+		/* Check that we do not resubmit the held request */
+		if (!i915_request_wait(rq, 0, HZ / 5)) {
+			pr_err("%s: on hold request completed!\n",
+			       engine->name);
+			i915_request_put(rq);
+			err = -EIO;
+			goto out;
+		}
+		GEM_BUG_ON(!i915_request_on_hold(rq));
+
+		/* But is resubmitted on release */
+		execlists_unhold(engine, rq);
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			pr_err("%s: held request did not complete!\n",
+			       engine->name);
+			intel_gt_set_wedged(gt);
+			err = -ETIME;
+		}
+		i915_request_put(rq);
+
+out:
+		engine_heartbeat_enable(engine, heartbeat);
+		intel_context_put(ce);
+		if (err)
+			break;
+	}
+
 	igt_spinner_fini(&spin);
-err_unlock:
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int
+emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 10);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_NEQ_SDD;
+	*cs++ = 0;
+	*cs++ = i915_ggtt_offset(vma) + 4 * idx;
+	*cs++ = 0;
+
+	if (idx > 0) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
+		*cs++ = 0;
+		*cs++ = 1;
+	} else {
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+	}
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+	intel_ring_advance(rq, cs);
+	return 0;
+}
+
+static struct i915_request *
+semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	int err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		goto out_ce;
+
+	err = 0;
+	if (rq->engine->emit_init_breadcrumb)
+		err = rq->engine->emit_init_breadcrumb(rq);
+	if (err == 0)
+		err = emit_semaphore_chain(rq, vma, idx);
+	if (err == 0)
+		i915_request_get(rq);
+	i915_request_add(rq);
+	if (err)
+		rq = ERR_PTR(err);
+
+out_ce:
+	intel_context_put(ce);
+	return rq;
+}
+
+static int
+release_queue(struct intel_engine_cs *engine,
+	      struct i915_vma *vma,
+	      int idx, int prio)
+{
+	struct i915_sched_attr attr = {
+		.priority = prio,
+	};
+	struct i915_request *rq;
+	u32 *cs;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
+
+	*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+	*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
+	*cs++ = 0;
+	*cs++ = 1;
+
+	intel_ring_advance(rq, cs);
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	local_bh_disable();
+	engine->schedule(rq, &attr);
+	local_bh_enable(); /* kick tasklet */
+
+	i915_request_put(rq);
+
+	return 0;
+}
+
+static int
+slice_semaphore_queue(struct intel_engine_cs *outer,
+		      struct i915_vma *vma,
+		      int count)
+{
+	struct intel_engine_cs *engine;
+	struct i915_request *head;
+	enum intel_engine_id id;
+	int err, i, n = 0;
+
+	head = semaphore_queue(outer, vma, n++);
+	if (IS_ERR(head))
+		return PTR_ERR(head);
+
+	for_each_engine(engine, outer->gt, id) {
+		for (i = 0; i < count; i++) {
+			struct i915_request *rq;
+
+			rq = semaphore_queue(engine, vma, n++);
+			if (IS_ERR(rq)) {
+				err = PTR_ERR(rq);
+				goto out;
+			}
+
+			i915_request_put(rq);
+		}
+	}
+
+	err = release_queue(outer, vma, n, INT_MAX);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(head, 0,
+			      2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
+		pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
+		       count, n);
+		GEM_TRACE_DUMP();
+		intel_gt_set_wedged(outer->gt);
+		err = -EIO;
+	}
+
+out:
+	i915_request_put(head);
+	return err;
+}
+
+static int live_timeslice_preempt(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	void *vaddr;
+	int err = 0;
+	int count;
+
+	/*
+	 * If a request takes too long, we would like to give other users
+	 * a fair go on the GPU. In particular, users may create batches
+	 * that wait upon external input, where that input may even be
+	 * supplied by another GPU job. To avoid blocking forever, we
+	 * need to preempt the current task and replace it with another
+	 * ready task.
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return 0;
+
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		goto err_map;
+
+	for_each_prime_number_from(count, 1, 16) {
+		struct intel_engine_cs *engine;
+		enum intel_engine_id id;
+
+		for_each_engine(engine, gt, id) {
+			unsigned long saved;
+
+			if (!intel_engine_has_preemption(engine))
+				continue;
+
+			memset(vaddr, 0, PAGE_SIZE);
+
+			engine_heartbeat_disable(engine, &saved);
+			err = slice_semaphore_queue(engine, vma, count);
+			engine_heartbeat_enable(engine, saved);
+			if (err)
+				goto err_pin;
+
+			if (igt_flush_test(gt->i915)) {
+				err = -EIO;
+				goto err_pin;
+			}
+		}
+	}
+
+err_pin:
+	i915_vma_unpin(vma);
+err_map:
+	i915_gem_object_unpin_map(obj);
+err_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return rq;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	return rq;
+}
+
+static int wait_for_submit(struct intel_engine_cs *engine,
+			   struct i915_request *rq,
+			   unsigned long timeout)
+{
+	timeout += jiffies;
+	do {
+		cond_resched();
+		intel_engine_flush_submission(engine);
+		if (i915_request_is_active(rq))
+			return 0;
+	} while (time_before(jiffies, timeout));
+
+	return -ETIME;
+}
+
+static long timeslice_threshold(const struct intel_engine_cs *engine)
+{
+	return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
+}
+
+static int live_timeslice_queue(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct i915_vma *vma;
+	void *vaddr;
+	int err = 0;
+
+	/*
+	 * Make sure that even if ELSP[0] and ELSP[1] are filled with
+	 * timeslicing between them disabled, we *do* enable timeslicing
+	 * if the queue demands it. (Normally, we do not submit if
+	 * ELSP[1] is already occupied, so must rely on timeslicing to
+	 * eject ELSP[0] in favour of the queue.)
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+		return 0;
+
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+	if (err)
+		goto err_map;
+
+	for_each_engine(engine, gt, id) {
+		struct i915_sched_attr attr = {
+			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+		};
+		struct i915_request *rq, *nop;
+		unsigned long saved;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		engine_heartbeat_disable(engine, &saved);
+		memset(vaddr, 0, PAGE_SIZE);
+
+		/* ELSP[0]: semaphore wait */
+		rq = semaphore_queue(engine, vma, 0);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_heartbeat;
+		}
+		engine->schedule(rq, &attr);
+		err = wait_for_submit(engine, rq, HZ / 2);
+		if (err) {
+			pr_err("%s: Timed out trying to submit semaphores\n",
+			       engine->name);
+			goto err_rq;
+		}
+
+		/* ELSP[1]: nop request */
+		nop = nop_request(engine);
+		if (IS_ERR(nop)) {
+			err = PTR_ERR(nop);
+			goto err_rq;
+		}
+		err = wait_for_submit(engine, nop, HZ / 2);
+		i915_request_put(nop);
+		if (err) {
+			pr_err("%s: Timed out trying to submit nop\n",
+			       engine->name);
+			goto err_rq;
+		}
+
+		GEM_BUG_ON(i915_request_completed(rq));
+		GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+		/* Queue: semaphore signal, matching priority as semaphore */
+		err = release_queue(engine, vma, 1, effective_prio(rq));
+		if (err)
+			goto err_rq;
+
+		intel_engine_flush_submission(engine);
+		if (!READ_ONCE(engine->execlists.timer.expires) &&
+		    !i915_request_completed(rq)) {
+			struct drm_printer p =
+				drm_info_printer(gt->i915->drm.dev);
+
+			GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
+				      engine->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+			GEM_TRACE_DUMP();
+
+			memset(vaddr, 0xff, PAGE_SIZE);
+			err = -EINVAL;
+		}
+
+		/* Timeslice every jiffy, so within 2 we should signal */
+		if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
+			struct drm_printer p =
+				drm_info_printer(gt->i915->drm.dev);
+
+			pr_err("%s: Failed to timeslice into queue\n",
+			       engine->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+
+			memset(vaddr, 0xff, PAGE_SIZE);
+			err = -EIO;
+		}
+err_rq:
+		i915_request_put(rq);
+err_heartbeat:
+		engine_heartbeat_enable(engine, saved);
+		if (err)
+			break;
+	}
+
+	i915_vma_unpin(vma);
+err_map:
+	i915_gem_object_unpin_map(obj);
+err_obj:
+	i915_gem_object_put(obj);
 	return err;
 }
 
 static int live_busywait_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct intel_engine_cs *engine;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	u32 *map;
 
@@ -96,22 +798,19 @@ static int live_busywait_preempt(void *arg)
 	 * preempt the busywaits used to synchronise between rings.
 	 */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
-		goto err_unlock;
+		return -ENOMEM;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 	ctx_lo->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
-	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto err_ctx_lo;
@@ -123,7 +822,7 @@ static int live_busywait_preempt(void *arg)
 		goto err_obj;
 	}
 
-	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+	vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
 	if (IS_ERR(vma)) {
 		err = PTR_ERR(vma);
 		goto err_map;
@@ -133,15 +832,18 @@ static int live_busywait_preempt(void *arg)
 	if (err)
 		goto err_map;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *lo, *hi;
 		struct igt_live_test t;
 		u32 *cs;
 
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
 		if (!intel_engine_can_store_dword(engine))
 			continue;
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_vma;
 		}
@@ -184,15 +886,19 @@ static int live_busywait_preempt(void *arg)
 		*cs++ = 0;
 
 		intel_ring_advance(lo, cs);
+
+		i915_request_get(lo);
 		i915_request_add(lo);
 
 		if (wait_for(READ_ONCE(*map), 10)) {
+			i915_request_put(lo);
 			err = -ETIMEDOUT;
 			goto err_vma;
 		}
 
 		/* Low priority request should be busywaiting now */
 		if (i915_request_wait(lo, 0, 1) != -ETIME) {
+			i915_request_put(lo);
 			pr_err("%s: Busywaiting request did not!\n",
 			       engine->name);
 			err = -EIO;
@@ -202,6 +908,7 @@ static int live_busywait_preempt(void *arg)
 		hi = igt_request_alloc(ctx_hi, engine);
 		if (IS_ERR(hi)) {
 			err = PTR_ERR(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -209,6 +916,7 @@ static int live_busywait_preempt(void *arg)
 		if (IS_ERR(cs)) {
 			err = PTR_ERR(cs);
 			i915_request_add(hi);
+			i915_request_put(lo);
 			goto err_vma;
 		}
 
@@ -221,7 +929,7 @@ static int live_busywait_preempt(void *arg)
 		i915_request_add(hi);
 
 		if (i915_request_wait(lo, 0, HZ / 5) < 0) {
-			struct drm_printer p = drm_info_printer(i915->drm.dev);
+			struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
 
 			pr_err("%s: Failed to preempt semaphore busywait!\n",
 			       engine->name);
@@ -229,11 +937,13 @@ static int live_busywait_preempt(void *arg)
 			intel_engine_dump(engine, &p, "%s\n", engine->name);
 			GEM_TRACE_DUMP();
 
-			i915_gem_set_wedged(i915);
+			i915_request_put(lo);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_vma;
 		}
 		GEM_BUG_ON(READ_ONCE(*map));
+		i915_request_put(lo);
 
 		if (igt_live_test_end(&t)) {
 			err = -EIO;
@@ -252,65 +962,74 @@ err_ctx_lo:
 	kernel_context_close(ctx_lo);
 err_ctx_hi:
 	kernel_context_close(ctx_hi);
-err_unlock:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
+static struct i915_request *
+spinner_create_request(struct igt_spinner *spin,
+		       struct i915_gem_context *ctx,
+		       struct intel_engine_cs *engine,
+		       u32 arb)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = igt_spinner_create_request(spin, ce, arb);
+	intel_context_put(ce);
+	return rq;
+}
+
 static int live_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+	if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
 		pr_err("Logical preemption supported, but not exposed\n");
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin_hi, i915))
-		goto err_unlock;
+	if (igt_spinner_init(&spin_hi, gt))
+		return -ENOMEM;
 
-	if (igt_spinner_init(&spin_lo, i915))
+	if (igt_spinner_init(&spin_lo, gt))
 		goto err_spin_hi;
 
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 	ctx_lo->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct igt_live_test t;
 		struct i915_request *rq;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
-		rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
-						MI_ARB_CHECK);
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto err_ctx_lo;
@@ -320,13 +1039,13 @@ static int live_preempt(void *arg)
 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
 			GEM_TRACE("lo spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
-		rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
-						MI_ARB_CHECK);
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+					    MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
 			igt_spinner_end(&spin_lo);
 			err = PTR_ERR(rq);
@@ -337,7 +1056,7 @@ static int live_preempt(void *arg)
 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 			GEM_TRACE("hi spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -360,58 +1079,53 @@ err_spin_lo:
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 }
 
 static int live_late_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {};
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin_hi, i915))
-		goto err_unlock;
+	if (igt_spinner_init(&spin_hi, gt))
+		return -ENOMEM;
 
-	if (igt_spinner_init(&spin_lo, i915))
+	if (igt_spinner_init(&spin_lo, gt))
 		goto err_spin_hi;
 
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 
-	for_each_engine(engine, i915, id) {
+	/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
+	ctx_lo->sched.priority = I915_USER_PRIORITY(1);
+
+	for_each_engine(engine, gt, id) {
 		struct igt_live_test t;
 		struct i915_request *rq;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
-		rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
-						MI_ARB_CHECK);
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto err_ctx_lo;
@@ -423,8 +1137,8 @@ static int live_late_preempt(void *arg)
 			goto err_wedged;
 		}
 
-		rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
-						MI_NOOP);
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+					    MI_NOOP);
 		if (IS_ERR(rq)) {
 			igt_spinner_end(&spin_lo);
 			err = PTR_ERR(rq);
@@ -464,16 +1178,12 @@ err_spin_lo:
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&spin_hi);
 	igt_spinner_end(&spin_lo);
-	i915_gem_set_wedged(i915);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_ctx_lo;
 }
@@ -483,14 +1193,13 @@ struct preempt_client {
 	struct i915_gem_context *ctx;
 };
 
-static int preempt_client_init(struct drm_i915_private *i915,
-			       struct preempt_client *c)
+static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
 {
-	c->ctx = kernel_context(i915);
+	c->ctx = kernel_context(gt->i915);
 	if (!c->ctx)
 		return -ENOMEM;
 
-	if (igt_spinner_init(&c->spin, i915))
+	if (igt_spinner_init(&c->spin, gt))
 		goto err_ctx;
 
 	return 0;
@@ -506,16 +1215,435 @@ static void preempt_client_fini(struct preempt_client *c)
 	kernel_context_close(c->ctx);
 }
 
+static int live_nopreempt(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct preempt_client a, b;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	/*
+	 * Verify that we can disable preemption for an individual request
+	 * that may be being observed and not want to be interrupted.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	if (preempt_client_init(gt, &a))
+		return -ENOMEM;
+	if (preempt_client_init(gt, &b))
+		goto err_client_a;
+	b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq_a, *rq_b;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		engine->execlists.preempt_hang.count = 0;
+
+		rq_a = spinner_create_request(&a.spin,
+					      a.ctx, engine,
+					      MI_ARB_CHECK);
+		if (IS_ERR(rq_a)) {
+			err = PTR_ERR(rq_a);
+			goto err_client_b;
+		}
+
+		/* Low priority client, but unpreemptable! */
+		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
+
+		i915_request_add(rq_a);
+		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
+			pr_err("First client failed to start\n");
+			goto err_wedged;
+		}
+
+		rq_b = spinner_create_request(&b.spin,
+					      b.ctx, engine,
+					      MI_ARB_CHECK);
+		if (IS_ERR(rq_b)) {
+			err = PTR_ERR(rq_b);
+			goto err_client_b;
+		}
+
+		i915_request_add(rq_b);
+
+		/* B is much more important than A! (But A is unpreemptable.) */
+		GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
+
+		/* Wait long enough for preemption and timeslicing */
+		if (igt_wait_for_spinner(&b.spin, rq_b)) {
+			pr_err("Second client started too early!\n");
+			goto err_wedged;
+		}
+
+		igt_spinner_end(&a.spin);
+
+		if (!igt_wait_for_spinner(&b.spin, rq_b)) {
+			pr_err("Second client failed to start\n");
+			goto err_wedged;
+		}
+
+		igt_spinner_end(&b.spin);
+
+		if (engine->execlists.preempt_hang.count) {
+			pr_err("Preemption recorded x%d; should have been suppressed!\n",
+			       engine->execlists.preempt_hang.count);
+			err = -EINVAL;
+			goto err_wedged;
+		}
+
+		if (igt_flush_test(gt->i915))
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&b);
+err_client_a:
+	preempt_client_fini(&a);
+	return err;
+
+err_wedged:
+	igt_spinner_end(&b.spin);
+	igt_spinner_end(&a.spin);
+	intel_gt_set_wedged(gt);
+	err = -EIO;
+	goto err_client_b;
+}
+
+struct live_preempt_cancel {
+	struct intel_engine_cs *engine;
+	struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq;
+	struct igt_live_test t;
+	int err;
+
+	/* Preempt cancel of ELSP0 */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	rq = spinner_create_request(&arg->a.spin,
+				    arg->a.ctx, arg->engine,
+				    MI_ARB_CHECK);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+		err = -EIO;
+		goto out;
+	}
+
+	intel_context_set_banned(rq->context);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_active1(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq[2] = {};
+	struct igt_live_test t;
+	int err;
+
+	/* Preempt cancel of ELSP1 */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	rq[0] = spinner_create_request(&arg->a.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_NOOP); /* no preemption */
+	if (IS_ERR(rq[0]))
+		return PTR_ERR(rq[0]);
+
+	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+	i915_request_get(rq[0]);
+	i915_request_add(rq[0]);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+		err = -EIO;
+		goto out;
+	}
+
+	rq[1] = spinner_create_request(&arg->b.spin,
+				       arg->b.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[1])) {
+		err = PTR_ERR(rq[1]);
+		goto out;
+	}
+
+	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+	i915_request_get(rq[1]);
+	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+	i915_request_add(rq[1]);
+	if (err)
+		goto out;
+
+	intel_context_set_banned(rq[1]->context);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	igt_spinner_end(&arg->a.spin);
+	if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq[0]->fence.error != 0) {
+		pr_err("Normal inflight0 request did not complete\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[1]->fence.error != -EIO) {
+		pr_err("Cancelled inflight1 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq[1]);
+	i915_request_put(rq[0]);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_queued(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq[3] = {};
+	struct igt_live_test t;
+	int err;
+
+	/* Full ELSP and one in the wings */
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	if (igt_live_test_begin(&t, arg->engine->i915,
+				__func__, arg->engine->name))
+		return -EIO;
+
+	rq[0] = spinner_create_request(&arg->a.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[0]))
+		return PTR_ERR(rq[0]);
+
+	clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+	i915_request_get(rq[0]);
+	i915_request_add(rq[0]);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+		err = -EIO;
+		goto out;
+	}
+
+	rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
+	if (IS_ERR(rq[1])) {
+		err = PTR_ERR(rq[1]);
+		goto out;
+	}
+
+	clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+	i915_request_get(rq[1]);
+	err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+	i915_request_add(rq[1]);
+	if (err)
+		goto out;
+
+	rq[2] = spinner_create_request(&arg->b.spin,
+				       arg->a.ctx, arg->engine,
+				       MI_ARB_CHECK);
+	if (IS_ERR(rq[2])) {
+		err = PTR_ERR(rq[2]);
+		goto out;
+	}
+
+	i915_request_get(rq[2]);
+	err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
+	i915_request_add(rq[2]);
+	if (err)
+		goto out;
+
+	intel_context_set_banned(rq[2]->context);
+	err = intel_engine_pulse(arg->engine);
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq[0]->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[1]->fence.error != 0) {
+		pr_err("Normal inflight1 request did not complete\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (rq[2]->fence.error != -EIO) {
+		pr_err("Cancelled queued request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq[2]);
+	i915_request_put(rq[1]);
+	i915_request_put(rq[0]);
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	return err;
+}
+
+static int __cancel_hostile(struct live_preempt_cancel *arg)
+{
+	struct i915_request *rq;
+	int err;
+
+	/* Preempt cancel non-preemptible spinner in ELSP0 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+		return 0;
+
+	GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+	rq = spinner_create_request(&arg->a.spin,
+				    arg->a.ctx, arg->engine,
+				    MI_NOOP); /* preemption disabled */
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	clear_bit(CONTEXT_BANNED, &rq->context->flags);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+		err = -EIO;
+		goto out;
+	}
+
+	intel_context_set_banned(rq->context);
+	err = intel_engine_pulse(arg->engine); /* force reset */
+	if (err)
+		goto out;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (rq->fence.error != -EIO) {
+		pr_err("Cancelled inflight0 request did not report -EIO\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+out:
+	i915_request_put(rq);
+	if (igt_flush_test(arg->engine->i915))
+		err = -EIO;
+	return err;
+}
+
+static int live_preempt_cancel(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct live_preempt_cancel data;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	/*
+	 * To cancel an inflight context, we need to first remove it from the
+	 * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
+	 */
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	if (preempt_client_init(gt, &data.a))
+		return -ENOMEM;
+	if (preempt_client_init(gt, &data.b))
+		goto err_client_a;
+
+	for_each_engine(data.engine, gt, id) {
+		if (!intel_engine_has_preemption(data.engine))
+			continue;
+
+		err = __cancel_active0(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_active1(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_queued(&data);
+		if (err)
+			goto err_wedged;
+
+		err = __cancel_hostile(&data);
+		if (err)
+			goto err_wedged;
+	}
+
+	err = 0;
+err_client_b:
+	preempt_client_fini(&data.b);
+err_client_a:
+	preempt_client_fini(&data.a);
+	return err;
+
+err_wedged:
+	GEM_TRACE_DUMP();
+	igt_spinner_end(&data.b.spin);
+	igt_spinner_end(&data.a.spin);
+	intel_gt_set_wedged(gt);
+	goto err_client_b;
+}
+
 static int live_suppress_self_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct i915_sched_attr attr = {
 		.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
 	};
 	struct preempt_client a, b;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -525,49 +1653,58 @@ static int live_suppress_self_preempt(void *arg)
 	 * completion event.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0; /* presume black blox */
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	if (intel_vgpu_active(gt->i915))
+		return 0; /* GVT forces single port & request submission */
 
-	if (preempt_client_init(i915, &a))
-		goto err_unlock;
-	if (preempt_client_init(i915, &b))
+	if (preempt_client_init(gt, &a))
+		return -ENOMEM;
+	if (preempt_client_init(gt, &b))
 		goto err_client_a;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *rq_a, *rq_b;
 		int depth;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
+		if (igt_flush_test(gt->i915))
+			goto err_wedged;
+
+		intel_engine_pm_get(engine);
 		engine->execlists.preempt_hang.count = 0;
 
-		rq_a = igt_spinner_create_request(&a.spin,
-						  a.ctx, engine,
-						  MI_NOOP);
+		rq_a = spinner_create_request(&a.spin,
+					      a.ctx, engine,
+					      MI_NOOP);
 		if (IS_ERR(rq_a)) {
 			err = PTR_ERR(rq_a);
+			intel_engine_pm_put(engine);
 			goto err_client_b;
 		}
 
 		i915_request_add(rq_a);
 		if (!igt_wait_for_spinner(&a.spin, rq_a)) {
 			pr_err("First client failed to start\n");
+			intel_engine_pm_put(engine);
 			goto err_wedged;
 		}
 
+		/* Keep postponing the timer to avoid premature slicing */
+		mod_timer(&engine->execlists.timer, jiffies + HZ);
 		for (depth = 0; depth < 8; depth++) {
-			rq_b = igt_spinner_create_request(&b.spin,
-							  b.ctx, engine,
-							  MI_NOOP);
+			rq_b = spinner_create_request(&b.spin,
+						      b.ctx, engine,
+						      MI_NOOP);
 			if (IS_ERR(rq_b)) {
 				err = PTR_ERR(rq_b);
+				intel_engine_pm_put(engine);
 				goto err_client_b;
 			}
 			i915_request_add(rq_b);
@@ -578,6 +1715,7 @@ static int live_suppress_self_preempt(void *arg)
 
 			if (!igt_wait_for_spinner(&b.spin, rq_b)) {
 				pr_err("Second client failed to start\n");
+				intel_engine_pm_put(engine);
 				goto err_wedged;
 			}
 
@@ -587,14 +1725,17 @@ static int live_suppress_self_preempt(void *arg)
 		igt_spinner_end(&a.spin);
 
 		if (engine->execlists.preempt_hang.count) {
-			pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
+			pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
+			       engine->name,
 			       engine->execlists.preempt_hang.count,
 			       depth);
+			intel_engine_pm_put(engine);
 			err = -EINVAL;
 			goto err_client_b;
 		}
 
-		if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		intel_engine_pm_put(engine);
+		if (igt_flush_test(gt->i915))
 			goto err_wedged;
 	}
 
@@ -603,17 +1744,12 @@ err_client_b:
 	preempt_client_fini(&b);
 err_client_a:
 	preempt_client_fini(&a);
-err_unlock:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&b.spin);
 	igt_spinner_end(&a.spin);
-	i915_gem_set_wedged(i915);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_b;
 }
@@ -632,9 +1768,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	if (!rq)
 		return NULL;
 
-	INIT_LIST_HEAD(&rq->active_list);
 	rq->engine = engine;
 
+	spin_lock_init(&rq->lock);
+	INIT_LIST_HEAD(&rq->fence.cb_list);
+	rq->fence.lock = &rq->lock;
+	rq->fence.ops = &i915_fence_ops;
+
 	i915_sched_node_init(&rq->sched);
 
 	/* mark this request as permanently incomplete */
@@ -646,6 +1786,10 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
 	i915_sw_fence_init(&rq->submit, dummy_notify);
 	set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
 
+	spin_lock_init(&rq->lock);
+	rq->fence.lock = &rq->lock;
+	INIT_LIST_HEAD(&rq->fence.cb_list);
+
 	return rq;
 }
 
@@ -665,11 +1809,11 @@ static void dummy_request_free(struct i915_request *dummy)
 
 static int live_suppress_wait_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct preempt_client client[4];
+	struct i915_request *rq[ARRAY_SIZE(client)] = {};
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 	int i;
 
@@ -679,22 +1823,19 @@ static int live_suppress_wait_preempt(void *arg)
 	 * not needlessly generate preempt-to-idle cycles.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
-		goto err_unlock;
-	if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+	if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
+		return -ENOMEM;
+	if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
 		goto err_client_0;
-	if (preempt_client_init(i915, &client[2])) /* head of queue */
+	if (preempt_client_init(gt, &client[2])) /* head of queue */
 		goto err_client_1;
-	if (preempt_client_init(i915, &client[3])) /* bystander */
+	if (preempt_client_init(gt, &client[3])) /* bystander */
 		goto err_client_2;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		int depth;
 
 		if (!intel_engine_has_preemption(engine))
@@ -704,7 +1845,6 @@ static int live_suppress_wait_preempt(void *arg)
 			continue;
 
 		for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
-			struct i915_request *rq[ARRAY_SIZE(client)];
 			struct i915_request *dummy;
 
 			engine->execlists.preempt_hang.count = 0;
@@ -714,18 +1854,22 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_client_3;
 
 			for (i = 0; i < ARRAY_SIZE(client); i++) {
-				rq[i] = igt_spinner_create_request(&client[i].spin,
-								   client[i].ctx, engine,
-								   MI_NOOP);
-				if (IS_ERR(rq[i])) {
-					err = PTR_ERR(rq[i]);
+				struct i915_request *this;
+
+				this = spinner_create_request(&client[i].spin,
+							      client[i].ctx, engine,
+							      MI_NOOP);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
 					goto err_wedged;
 				}
 
 				/* Disable NEWCLIENT promotion */
-				__i915_active_request_set(&rq[i]->timeline->last_request,
-							  dummy);
-				i915_request_add(rq[i]);
+				__i915_active_fence_set(&i915_request_timeline(this)->last_request,
+							&dummy->fence);
+
+				rq[i] = i915_request_get(this);
+				i915_request_add(this);
 			}
 
 			dummy_request_free(dummy);
@@ -746,10 +1890,13 @@ static int live_suppress_wait_preempt(void *arg)
 				goto err_wedged;
 			}
 
-			for (i = 0; i < ARRAY_SIZE(client); i++)
+			for (i = 0; i < ARRAY_SIZE(client); i++) {
 				igt_spinner_end(&client[i].spin);
+				i915_request_put(rq[i]);
+				rq[i] = NULL;
+			}
 
-			if (igt_flush_test(i915, I915_WAIT_LOCKED))
+			if (igt_flush_test(gt->i915))
 				goto err_wedged;
 
 			if (engine->execlists.preempt_hang.count) {
@@ -772,28 +1919,24 @@ err_client_1:
 	preempt_client_fini(&client[1]);
 err_client_0:
 	preempt_client_fini(&client[0]);
-err_unlock:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
-	for (i = 0; i < ARRAY_SIZE(client); i++)
+	for (i = 0; i < ARRAY_SIZE(client); i++) {
 		igt_spinner_end(&client[i].spin);
-	i915_gem_set_wedged(i915);
+		i915_request_put(rq[i]);
+	}
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_3;
 }
 
 static int live_chain_preempt(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *engine;
 	struct preempt_client hi, lo;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
 	/*
@@ -802,19 +1945,16 @@ static int live_chain_preempt(void *arg)
 	 * the previously submitted spinner in B.
 	 */
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (preempt_client_init(i915, &hi))
-		goto err_unlock;
+	if (preempt_client_init(gt, &hi))
+		return -ENOMEM;
 
-	if (preempt_client_init(i915, &lo))
+	if (preempt_client_init(gt, &lo))
 		goto err_client_hi;
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_sched_attr attr = {
 			.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
 		};
@@ -825,11 +1965,13 @@ static int live_chain_preempt(void *arg)
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		rq = igt_spinner_create_request(&lo.spin,
-						lo.ctx, engine,
-						MI_ARB_CHECK);
+		rq = spinner_create_request(&lo.spin,
+					    lo.ctx, engine,
+					    MI_ARB_CHECK);
 		if (IS_ERR(rq))
 			goto err_wedged;
+
+		i915_request_get(rq);
 		i915_request_add(rq);
 
 		ring_size = rq->wa_tail - rq->head;
@@ -842,27 +1984,29 @@ static int live_chain_preempt(void *arg)
 		igt_spinner_end(&lo.spin);
 		if (i915_request_wait(rq, 0, HZ / 2) < 0) {
 			pr_err("Timed out waiting to flush %s\n", engine->name);
+			i915_request_put(rq);
 			goto err_wedged;
 		}
+		i915_request_put(rq);
 
-		if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
 			err = -EIO;
 			goto err_wedged;
 		}
 
 		for_each_prime_number_from(count, 1, ring_size) {
-			rq = igt_spinner_create_request(&hi.spin,
-							hi.ctx, engine,
-							MI_ARB_CHECK);
+			rq = spinner_create_request(&hi.spin,
+						    hi.ctx, engine,
+						    MI_ARB_CHECK);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
 			if (!igt_wait_for_spinner(&hi.spin, rq))
 				goto err_wedged;
 
-			rq = igt_spinner_create_request(&lo.spin,
-							lo.ctx, engine,
-							MI_ARB_CHECK);
+			rq = spinner_create_request(&lo.spin,
+						    lo.ctx, engine,
+						    MI_ARB_CHECK);
 			if (IS_ERR(rq))
 				goto err_wedged;
 			i915_request_add(rq);
@@ -877,36 +2021,46 @@ static int live_chain_preempt(void *arg)
 			rq = igt_request_alloc(hi.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
 			engine->schedule(rq, &attr);
 
 			igt_spinner_end(&hi.spin);
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
+					drm_info_printer(gt->i915->drm.dev);
 
 				pr_err("Failed to preempt over chain of %d\n",
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+				i915_request_put(rq);
 				goto err_wedged;
 			}
 			igt_spinner_end(&lo.spin);
+			i915_request_put(rq);
 
 			rq = igt_request_alloc(lo.ctx, engine);
 			if (IS_ERR(rq))
 				goto err_wedged;
+
+			i915_request_get(rq);
 			i915_request_add(rq);
+
 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
 				struct drm_printer p =
-					drm_info_printer(i915->drm.dev);
+					drm_info_printer(gt->i915->drm.dev);
 
 				pr_err("Failed to flush low priority chain of %d requests\n",
 				       count);
 				intel_engine_dump(engine, &p,
 						  "%s\n", engine->name);
+
+				i915_request_put(rq);
 				goto err_wedged;
 			}
+			i915_request_put(rq);
 		}
 
 		if (igt_live_test_end(&t)) {
@@ -920,66 +2074,252 @@ err_client_lo:
 	preempt_client_fini(&lo);
 err_client_hi:
 	preempt_client_fini(&hi);
-err_unlock:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
-		err = -EIO;
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
 	return err;
 
 err_wedged:
 	igt_spinner_end(&hi.spin);
 	igt_spinner_end(&lo.spin);
-	i915_gem_set_wedged(i915);
+	intel_gt_set_wedged(gt);
 	err = -EIO;
 	goto err_client_lo;
 }
 
+static int create_gang(struct intel_engine_cs *engine,
+		       struct i915_request **prev)
+{
+	struct drm_i915_gem_object *obj;
+	struct intel_context *ce;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	obj = i915_gem_object_create_internal(engine->i915, 4096);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto err_ce;
+	}
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err_obj;
+
+	cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(cs))
+		goto err_obj;
+
+	/* Semaphore target: spin until zero */
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_EQ_SDD;
+	*cs++ = 0;
+	*cs++ = lower_32_bits(vma->node.start);
+	*cs++ = upper_32_bits(vma->node.start);
+
+	if (*prev) {
+		u64 offset = (*prev)->batch->node.start;
+
+		/* Terminate the spinner in the next lower priority batch. */
+		*cs++ = MI_STORE_DWORD_IMM_GEN4;
+		*cs++ = lower_32_bits(offset);
+		*cs++ = upper_32_bits(offset);
+		*cs++ = 0;
+	}
+
+	*cs++ = MI_BATCH_BUFFER_END;
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq))
+		goto err_obj;
+
+	rq->batch = vma;
+	i915_request_get(rq);
+
+	i915_vma_lock(vma);
+	err = i915_request_await_object(rq, vma->obj, false);
+	if (!err)
+		err = i915_vma_move_to_active(vma, rq, 0);
+	if (!err)
+		err = rq->engine->emit_bb_start(rq,
+						vma->node.start,
+						PAGE_SIZE, 0);
+	i915_vma_unlock(vma);
+	i915_request_add(rq);
+	if (err)
+		goto err_rq;
+
+	i915_gem_object_put(obj);
+	intel_context_put(ce);
+
+	rq->client_link.next = &(*prev)->client_link;
+	*prev = rq;
+	return 0;
+
+err_rq:
+	i915_request_put(rq);
+err_obj:
+	i915_gem_object_put(obj);
+err_ce:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_preempt_gang(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	/*
+	 * Build as long a chain of preempters as we can, with each
+	 * request higher priority than the last. Once we are ready, we release
+	 * the last batch which then precolates down the chain, each releasing
+	 * the next oldest in turn. The intent is to simply push as hard as we
+	 * can with the number of preemptions, trying to exceed narrow HW
+	 * limits. At a minimum, we insist that we can sort all the user
+	 * high priority levels into execution order.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		struct i915_request *rq = NULL;
+		struct igt_live_test t;
+		IGT_TIMEOUT(end_time);
+		int prio = 0;
+		int err = 0;
+		u32 *cs;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
+			return -EIO;
+
+		do {
+			struct i915_sched_attr attr = {
+				.priority = I915_USER_PRIORITY(prio++),
+			};
+
+			err = create_gang(engine, &rq);
+			if (err)
+				break;
+
+			/* Submit each spinner at increasing priority */
+			engine->schedule(rq, &attr);
+
+			if (prio <= I915_PRIORITY_MAX)
+				continue;
+
+			if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
+				break;
+
+			if (__igt_timeout(end_time, NULL))
+				break;
+		} while (1);
+		pr_debug("%s: Preempt chain of %d requests\n",
+			 engine->name, prio);
+
+		/*
+		 * Such that the last spinner is the highest priority and
+		 * should execute first. When that spinner completes,
+		 * it will terminate the next lowest spinner until there
+		 * are no more spinners and the gang is complete.
+		 */
+		cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
+		if (!IS_ERR(cs)) {
+			*cs = 0;
+			i915_gem_object_unpin_map(rq->batch->obj);
+		} else {
+			err = PTR_ERR(cs);
+			intel_gt_set_wedged(gt);
+		}
+
+		while (rq) { /* wait for each rq from highest to lowest prio */
+			struct i915_request *n =
+				list_next_entry(rq, client_link);
+
+			if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
+				struct drm_printer p =
+					drm_info_printer(engine->i915->drm.dev);
+
+				pr_err("Failed to flush chain of %d requests, at %d\n",
+				       prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
+				intel_engine_dump(engine, &p,
+						  "%s\n", engine->name);
+
+				err = -ETIME;
+			}
+
+			i915_request_put(rq);
+			rq = n;
+		}
+
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int live_preempt_hang(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct i915_gem_context *ctx_hi, *ctx_lo;
 	struct igt_spinner spin_hi, spin_lo;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	intel_wakeref_t wakeref;
 	int err = -ENOMEM;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
 		return 0;
 
-	if (!intel_has_reset_engine(i915))
+	if (!intel_has_reset_engine(gt))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
-	if (igt_spinner_init(&spin_hi, i915))
-		goto err_unlock;
+	if (igt_spinner_init(&spin_hi, gt))
+		return -ENOMEM;
 
-	if (igt_spinner_init(&spin_lo, i915))
+	if (igt_spinner_init(&spin_lo, gt))
 		goto err_spin_hi;
 
-	ctx_hi = kernel_context(i915);
+	ctx_hi = kernel_context(gt->i915);
 	if (!ctx_hi)
 		goto err_spin_lo;
 	ctx_hi->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
 
-	ctx_lo = kernel_context(i915);
+	ctx_lo = kernel_context(gt->i915);
 	if (!ctx_lo)
 		goto err_ctx_hi;
 	ctx_lo->sched.priority =
 		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
 
-	for_each_engine(engine, i915, id) {
+	for_each_engine(engine, gt, id) {
 		struct i915_request *rq;
 
 		if (!intel_engine_has_preemption(engine))
 			continue;
 
-		rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
-						MI_ARB_CHECK);
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
 			err = PTR_ERR(rq);
 			goto err_ctx_lo;
@@ -989,13 +2329,13 @@ static int live_preempt_hang(void *arg)
 		if (!igt_wait_for_spinner(&spin_lo, rq)) {
 			GEM_TRACE("lo spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
-		rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
-						MI_ARB_CHECK);
+		rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+					    MI_ARB_CHECK);
 		if (IS_ERR(rq)) {
 			igt_spinner_end(&spin_lo);
 			err = PTR_ERR(rq);
@@ -1011,28 +2351,28 @@ static int live_preempt_hang(void *arg)
 						 HZ / 10)) {
 			pr_err("Preemption did not occur within timeout!");
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
-		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
-		i915_reset_engine(engine, NULL);
-		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+		intel_engine_reset(engine, NULL);
+		clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
 
 		engine->execlists.preempt_hang.inject_hang = false;
 
 		if (!igt_wait_for_spinner(&spin_hi, rq)) {
 			GEM_TRACE("hi spinner failed to start\n");
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto err_ctx_lo;
 		}
 
 		igt_spinner_end(&spin_hi);
 		igt_spinner_end(&spin_lo);
-		if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+		if (igt_flush_test(gt->i915)) {
 			err = -EIO;
 			goto err_ctx_lo;
 		}
@@ -1047,10 +2387,105 @@ err_spin_lo:
 	igt_spinner_fini(&spin_lo);
 err_spin_hi:
 	igt_spinner_fini(&spin_hi);
-err_unlock:
-	igt_flush_test(i915, I915_WAIT_LOCKED);
-	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-	mutex_unlock(&i915->drm.struct_mutex);
+	return err;
+}
+
+static int live_preempt_timeout(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct i915_gem_context *ctx_hi, *ctx_lo;
+	struct igt_spinner spin_lo;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = -ENOMEM;
+
+	/*
+	 * Check that we force preemption to occur by cancelling the previous
+	 * context if it refuses to yield the GPU.
+	 */
+	if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+		return 0;
+
+	if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+		return 0;
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	if (igt_spinner_init(&spin_lo, gt))
+		return -ENOMEM;
+
+	ctx_hi = kernel_context(gt->i915);
+	if (!ctx_hi)
+		goto err_spin_lo;
+	ctx_hi->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+	ctx_lo = kernel_context(gt->i915);
+	if (!ctx_lo)
+		goto err_ctx_hi;
+	ctx_lo->sched.priority =
+		I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+	for_each_engine(engine, gt, id) {
+		unsigned long saved_timeout;
+		struct i915_request *rq;
+
+		if (!intel_engine_has_preemption(engine))
+			continue;
+
+		rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+					    MI_NOOP); /* preemption disabled */
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		i915_request_add(rq);
+		if (!igt_wait_for_spinner(&spin_lo, rq)) {
+			intel_gt_set_wedged(gt);
+			err = -EIO;
+			goto err_ctx_lo;
+		}
+
+		rq = igt_request_alloc(ctx_hi, engine);
+		if (IS_ERR(rq)) {
+			igt_spinner_end(&spin_lo);
+			err = PTR_ERR(rq);
+			goto err_ctx_lo;
+		}
+
+		/* Flush the previous CS ack before changing timeouts */
+		while (READ_ONCE(engine->execlists.pending[0]))
+			cpu_relax();
+
+		saved_timeout = engine->props.preempt_timeout_ms;
+		engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		intel_engine_flush_submission(engine);
+		engine->props.preempt_timeout_ms = saved_timeout;
+
+		if (i915_request_wait(rq, 0, HZ / 10) < 0) {
+			intel_gt_set_wedged(gt);
+			i915_request_put(rq);
+			err = -ETIME;
+			goto err_ctx_lo;
+		}
+
+		igt_spinner_end(&spin_lo);
+		i915_request_put(rq);
+	}
+
+	err = 0;
+err_ctx_lo:
+	kernel_context_close(ctx_lo);
+err_ctx_hi:
+	kernel_context_close(ctx_hi);
+err_spin_lo:
+	igt_spinner_fini(&spin_lo);
 	return err;
 }
 
@@ -1065,7 +2500,7 @@ static int random_priority(struct rnd_state *rnd)
 }
 
 struct preempt_smoke {
-	struct drm_i915_private *i915;
+	struct intel_gt *gt;
 	struct i915_gem_context **contexts;
 	struct intel_engine_cs *engine;
 	struct drm_i915_gem_object *batch;
@@ -1089,7 +2524,11 @@ static int smoke_submit(struct preempt_smoke *smoke,
 	int err = 0;
 
 	if (batch) {
-		vma = i915_vma_instance(batch, ctx->vm, NULL);
+		struct i915_address_space *vm;
+
+		vm = i915_gem_context_get_vm_rcu(ctx);
+		vma = i915_vma_instance(batch, vm, NULL);
+		i915_vm_put(vm);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 
@@ -1108,11 +2547,13 @@ static int smoke_submit(struct preempt_smoke *smoke,
 
 	if (vma) {
 		i915_vma_lock(vma);
-		err = rq->engine->emit_bb_start(rq,
-						vma->node.start,
-						PAGE_SIZE, 0);
+		err = i915_request_await_object(rq, vma->obj, false);
 		if (!err)
 			err = i915_vma_move_to_active(vma, rq, 0);
+		if (!err)
+			err = rq->engine->emit_bb_start(rq,
+							vma->node.start,
+							PAGE_SIZE, 0);
 		i915_vma_unlock(vma);
 	}
 
@@ -1136,11 +2577,9 @@ static int smoke_crescendo_thread(void *arg)
 		struct i915_gem_context *ctx = smoke_context(smoke);
 		int err;
 
-		mutex_lock(&smoke->i915->drm.struct_mutex);
 		err = smoke_submit(smoke,
 				   ctx, count % I915_PRIORITY_MAX,
 				   smoke->batch);
-		mutex_unlock(&smoke->i915->drm.struct_mutex);
 		if (err)
 			return err;
 
@@ -1161,9 +2600,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 	unsigned long count;
 	int err = 0;
 
-	mutex_unlock(&smoke->i915->drm.struct_mutex);
-
-	for_each_engine(engine, smoke->i915, id) {
+	for_each_engine(engine, smoke->gt, id) {
 		arg[id] = *smoke;
 		arg[id].engine = engine;
 		if (!(flags & BATCH))
@@ -1179,8 +2616,10 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 		get_task_struct(tsk[id]);
 	}
 
+	yield(); /* start all threads before we kthread_stop() */
+
 	count = 0;
-	for_each_engine(engine, smoke->i915, id) {
+	for_each_engine(engine, smoke->gt, id) {
 		int status;
 
 		if (IS_ERR_OR_NULL(tsk[id]))
@@ -1195,11 +2634,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
 		put_task_struct(tsk[id]);
 	}
 
-	mutex_lock(&smoke->i915->drm.struct_mutex);
-
 	pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
 		count, flags,
-		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
 	return 0;
 }
 
@@ -1211,7 +2648,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
 
 	count = 0;
 	do {
-		for_each_engine(smoke->engine, smoke->i915, id) {
+		for_each_engine(smoke->engine, smoke->gt, id) {
 			struct i915_gem_context *ctx = smoke_context(smoke);
 			int err;
 
@@ -1227,25 +2664,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
 
 	pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
 		count, flags,
-		RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+		RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
 	return 0;
 }
 
 static int live_preempt_smoke(void *arg)
 {
 	struct preempt_smoke smoke = {
-		.i915 = arg,
+		.gt = arg,
 		.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
 		.ncontext = 1024,
 	};
 	const unsigned int phase[] = { 0, BATCH };
-	intel_wakeref_t wakeref;
 	struct igt_live_test t;
 	int err = -ENOMEM;
 	u32 *cs;
 	int n;
 
-	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+	if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
 		return 0;
 
 	smoke.contexts = kmalloc_array(smoke.ncontext,
@@ -1254,13 +2690,11 @@ static int live_preempt_smoke(void *arg)
 	if (!smoke.contexts)
 		return -ENOMEM;
 
-	mutex_lock(&smoke.i915->drm.struct_mutex);
-	wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
-
-	smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
+	smoke.batch =
+		i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
 	if (IS_ERR(smoke.batch)) {
 		err = PTR_ERR(smoke.batch);
-		goto err_unlock;
+		goto err_free;
 	}
 
 	cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
@@ -1274,13 +2708,13 @@ static int live_preempt_smoke(void *arg)
 	i915_gem_object_flush_map(smoke.batch);
 	i915_gem_object_unpin_map(smoke.batch);
 
-	if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
+	if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
 		err = -EIO;
 		goto err_batch;
 	}
 
 	for (n = 0; n < smoke.ncontext; n++) {
-		smoke.contexts[n] = kernel_context(smoke.i915);
+		smoke.contexts[n] = kernel_context(smoke.gt->i915);
 		if (!smoke.contexts[n])
 			goto err_ctx;
 	}
@@ -1307,15 +2741,13 @@ err_ctx:
 
 err_batch:
 	i915_gem_object_put(smoke.batch);
-err_unlock:
-	intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
-	mutex_unlock(&smoke.i915->drm.struct_mutex);
+err_free:
 	kfree(smoke.contexts);
 
 	return err;
 }
 
-static int nop_virtual_engine(struct drm_i915_private *i915,
+static int nop_virtual_engine(struct intel_gt *gt,
 			      struct intel_engine_cs **siblings,
 			      unsigned int nsibling,
 			      unsigned int nctx,
@@ -1323,28 +2755,18 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 #define CHAIN BIT(0)
 {
 	IGT_TIMEOUT(end_time);
-	struct i915_request *request[16];
-	struct i915_gem_context *ctx[16];
+	struct i915_request *request[16] = {};
 	struct intel_context *ve[16];
 	unsigned long n, prime, nc;
 	struct igt_live_test t;
 	ktime_t times[2] = {};
 	int err;
 
-	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+	GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
 
 	for (n = 0; n < nctx; n++) {
-		ctx[n] = kernel_context(i915);
-		if (!ctx[n]) {
-			err = -ENOMEM;
-			nctx = n;
-			goto out;
-		}
-
-		ve[n] = intel_execlists_create_virtual(ctx[n],
-						       siblings, nsibling);
+		ve[n] = intel_execlists_create_virtual(siblings, nsibling);
 		if (IS_ERR(ve[n])) {
-			kernel_context_close(ctx[n]);
 			err = PTR_ERR(ve[n]);
 			nctx = n;
 			goto out;
@@ -1353,13 +2775,12 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		err = intel_context_pin(ve[n]);
 		if (err) {
 			intel_context_put(ve[n]);
-			kernel_context_close(ctx[n]);
 			nctx = n;
 			goto out;
 		}
 	}
 
-	err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+	err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
 	if (err)
 		goto out;
 
@@ -1369,27 +2790,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		if (flags & CHAIN) {
 			for (nc = 0; nc < nctx; nc++) {
 				for (n = 0; n < prime; n++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		} else {
 			for (n = 0; n < prime; n++) {
 				for (nc = 0; nc < nctx; nc++) {
-					request[nc] =
-						i915_request_create(ve[nc]);
-					if (IS_ERR(request[nc])) {
-						err = PTR_ERR(request[nc]);
+					struct i915_request *rq;
+
+					rq = i915_request_create(ve[nc]);
+					if (IS_ERR(rq)) {
+						err = PTR_ERR(rq);
 						goto out;
 					}
 
-					i915_request_add(request[nc]);
+					if (request[nc])
+						i915_request_put(request[nc]);
+					request[nc] = i915_request_get(rq);
+					i915_request_add(rq);
 				}
 			}
 		}
@@ -1406,7 +2835,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 					  request[nc]->fence.context,
 					  request[nc]->fence.seqno);
 				GEM_TRACE_DUMP();
-				i915_gem_set_wedged(i915);
+				intel_gt_set_wedged(gt);
 				break;
 			}
 		}
@@ -1415,6 +2844,11 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		if (prime == 1)
 			times[0] = times[1];
 
+		for (nc = 0; nc < nctx; nc++) {
+			i915_request_put(request[nc]);
+			request[nc] = NULL;
+		}
+
 		if (__igt_timeout(end_time, NULL))
 			break;
 	}
@@ -1428,37 +2862,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
 		prime, div64_u64(ktime_to_ns(times[1]), prime));
 
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	for (nc = 0; nc < nctx; nc++) {
+		i915_request_put(request[nc]);
 		intel_context_unpin(ve[nc]);
 		intel_context_put(ve[nc]);
-		kernel_context_close(ctx[nc]);
 	}
 	return err;
 }
 
 static int live_virtual_engine(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	unsigned int class, inst;
-	int err = -ENODEV;
+	int err;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
-	for_each_engine(engine, i915, id) {
-		err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+	for_each_engine(engine, gt, id) {
+		err = nop_virtual_engine(gt, &engine, 1, 1, 0);
 		if (err) {
 			pr_err("Failed to wrap engine %s: err=%d\n",
 			       engine->name, err);
-			goto out_unlock;
+			return err;
 		}
 	}
 
@@ -1467,37 +2899,34 @@ static int live_virtual_engine(void *arg)
 
 		nsibling = 0;
 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
-			if (!i915->engine_class[class][inst])
+			if (!gt->engine_class[class][inst])
 				continue;
 
-			siblings[nsibling++] = i915->engine_class[class][inst];
+			siblings[nsibling++] = gt->engine_class[class][inst];
 		}
 		if (nsibling < 2)
 			continue;
 
 		for (n = 1; n <= nsibling + 1; n++) {
-			err = nop_virtual_engine(i915, siblings, nsibling,
+			err = nop_virtual_engine(gt, siblings, nsibling,
 						 n, 0);
 			if (err)
-				goto out_unlock;
+				return err;
 		}
 
-		err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+		err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
 		if (err)
-			goto out_unlock;
+			return err;
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
-	return err;
+	return 0;
 }
 
-static int mask_virtual_engine(struct drm_i915_private *i915,
+static int mask_virtual_engine(struct intel_gt *gt,
 			       struct intel_engine_cs **siblings,
 			       unsigned int nsibling)
 {
 	struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
-	struct i915_gem_context *ctx;
 	struct intel_context *ve;
 	struct igt_live_test t;
 	unsigned int n;
@@ -1508,11 +2937,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	 * restrict it to our desired engine within the virtual engine.
 	 */
 
-	ctx = kernel_context(i915);
-	if (!ctx)
-		return -ENOMEM;
-
-	ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+	ve = intel_execlists_create_virtual(siblings, nsibling);
 	if (IS_ERR(ve)) {
 		err = PTR_ERR(ve);
 		goto out_close;
@@ -1522,7 +2947,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	if (err)
 		goto out_put;
 
-	err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
 	if (err)
 		goto out_unpin;
 
@@ -1553,7 +2978,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 				  request[n]->fence.context,
 				  request[n]->fence.seqno);
 			GEM_TRACE_DUMP();
-			i915_gem_set_wedged(i915);
+			intel_gt_set_wedged(gt);
 			err = -EIO;
 			goto out;
 		}
@@ -1568,11 +2993,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
 	}
 
 	err = igt_live_test_end(&t);
-	if (err)
-		goto out;
-
 out:
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
 	for (n = 0; n < nsibling; n++)
@@ -1583,46 +3005,183 @@ out_unpin:
 out_put:
 	intel_context_put(ve);
 out_close:
-	kernel_context_close(ctx);
 	return err;
 }
 
 static int live_virtual_mask(void *arg)
 {
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	unsigned int class, inst;
-	int err = 0;
+	int err;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
 		unsigned int nsibling;
 
 		nsibling = 0;
 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
-			if (!i915->engine_class[class][inst])
+			if (!gt->engine_class[class][inst])
 				break;
 
-			siblings[nsibling++] = i915->engine_class[class][inst];
+			siblings[nsibling++] = gt->engine_class[class][inst];
 		}
 		if (nsibling < 2)
 			continue;
 
-		err = mask_virtual_engine(i915, siblings, nsibling);
+		err = mask_virtual_engine(gt, siblings, nsibling);
 		if (err)
-			goto out_unlock;
+			return err;
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
+	return 0;
+}
+
+static int preserved_virtual_engine(struct intel_gt *gt,
+				    struct intel_engine_cs **siblings,
+				    unsigned int nsibling)
+{
+	struct i915_request *last = NULL;
+	struct intel_context *ve;
+	struct i915_vma *scratch;
+	struct igt_live_test t;
+	unsigned int n;
+	int err = 0;
+	u32 *cs;
+
+	scratch = create_scratch(siblings[0]->gt);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	ve = intel_execlists_create_virtual(siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_scratch;
+	}
+
+	err = intel_context_pin(ve);
+	if (err)
+		goto out_put;
+
+	err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
+	if (err)
+		goto out_unpin;
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		struct intel_engine_cs *engine = siblings[n % nsibling];
+		struct i915_request *rq;
+
+		rq = i915_request_create(ve);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_end;
+		}
+
+		i915_request_put(last);
+		last = i915_request_get(rq);
+
+		cs = intel_ring_begin(rq, 8);
+		if (IS_ERR(cs)) {
+			i915_request_add(rq);
+			err = PTR_ERR(cs);
+			goto out_end;
+		}
+
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+		*cs++ = CS_GPR(engine, n);
+		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+		*cs++ = 0;
+
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
+		*cs++ = n + 1;
+
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+
+		/* Restrict this request to run on a particular engine */
+		rq->execution_mask = engine->mask;
+		i915_request_add(rq);
+	}
+
+	if (i915_request_wait(last, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto out_end;
+	}
+
+	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto out_end;
+	}
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		if (cs[n] != n) {
+			pr_err("Incorrect value[%d] found for GPR[%d]\n",
+			       cs[n], n);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(scratch->obj);
+
+out_end:
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	i915_request_put(last);
+out_unpin:
+	intel_context_unpin(ve);
+out_put:
+	intel_context_put(ve);
+out_scratch:
+	i915_vma_unpin_and_release(&scratch, 0);
 	return err;
 }
 
-static int bond_virtual_engine(struct drm_i915_private *i915,
+static int live_virtual_preserved(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+
+	/*
+	 * Check that the context image retains non-privileged (user) registers
+	 * from one engine to the next. For this we check that the CS_GPR
+	 * are preserved.
+	 */
+
+	if (USES_GUC_SUBMISSION(gt->i915))
+		return 0;
+
+	/* As we use CS_GPR we cannot run before they existed on all engines. */
+	if (INTEL_GEN(gt->i915) < 9)
+		return 0;
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, err;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!gt->engine_class[class][inst])
+				continue;
+
+			siblings[nsibling++] = gt->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = preserved_virtual_engine(gt, siblings, nsibling);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int bond_virtual_engine(struct intel_gt *gt,
 			       unsigned int class,
 			       struct intel_engine_cs **siblings,
 			       unsigned int nsibling,
@@ -1630,21 +3189,59 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 #define BOND_SCHEDULE BIT(0)
 {
 	struct intel_engine_cs *master;
-	struct i915_gem_context *ctx;
 	struct i915_request *rq[16];
 	enum intel_engine_id id;
+	struct igt_spinner spin;
 	unsigned long n;
 	int err;
 
+	/*
+	 * A set of bonded requests is intended to be run concurrently
+	 * across a number of engines. We use one request per-engine
+	 * and a magic fence to schedule each of the bonded requests
+	 * at the same time. A consequence of our current scheduler is that
+	 * we only move requests to the HW ready queue when the request
+	 * becomes ready, that is when all of its prerequisite fences have
+	 * been signaled. As one of those fences is the master submit fence,
+	 * there is a delay on all secondary fences as the HW may be
+	 * currently busy. Equally, as all the requests are independent,
+	 * they may have other fences that delay individual request
+	 * submission to HW. Ergo, we do not guarantee that all requests are
+	 * immediately submitted to HW at the same time, just that if the
+	 * rules are abided by, they are ready at the same time as the
+	 * first is submitted. Userspace can embed semaphores in its batch
+	 * to ensure parallel execution of its phases as it requires.
+	 * Though naturally it gets requested that perhaps the scheduler should
+	 * take care of parallel execution, even across preemption events on
+	 * different HW. (The proper answer is of course "lalalala".)
+	 *
+	 * With the submit-fence, we have identified three possible phases
+	 * of synchronisation depending on the master fence: queued (not
+	 * ready), executing, and signaled. The first two are quite simple
+	 * and checked below. However, the signaled master fence handling is
+	 * contentious. Currently we do not distinguish between a signaled
+	 * fence and an expired fence, as once signaled it does not convey
+	 * any information about the previous execution. It may even be freed
+	 * and hence checking later it may not exist at all. Ergo we currently
+	 * do not apply the bonding constraint for an already signaled fence,
+	 * as our expectation is that it should not constrain the secondaries
+	 * and is outside of the scope of the bonded request API (i.e. all
+	 * userspace requests are meant to be running in parallel). As
+	 * it imposes no constraint, and is effectively a no-op, we do not
+	 * check below as normal execution flows are checked extensively above.
+	 *
+	 * XXX Is the degenerate handling of signaled submit fences the
+	 * expected behaviour for userpace?
+	 */
+
 	GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
 
-	ctx = kernel_context(i915);
-	if (!ctx)
+	if (igt_spinner_init(&spin, gt))
 		return -ENOMEM;
 
 	err = 0;
 	rq[0] = ERR_PTR(-ENOMEM);
-	for_each_engine(master, i915, id) {
+	for_each_engine(master, gt, id) {
 		struct i915_sw_fence fence = {};
 
 		if (master->class == class)
@@ -1652,7 +3249,9 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 
 		memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
 
-		rq[0] = igt_request_alloc(ctx, master);
+		rq[0] = igt_spinner_create_request(&spin,
+						   master->kernel_context,
+						   MI_NOOP);
 		if (IS_ERR(rq[0])) {
 			err = PTR_ERR(rq[0]);
 			goto out;
@@ -1665,16 +3264,21 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 							       &fence,
 							       GFP_KERNEL);
 		}
+
 		i915_request_add(rq[0]);
 		if (err < 0)
 			goto out;
 
+		if (!(flags & BOND_SCHEDULE) &&
+		    !igt_wait_for_spinner(&spin, rq[0])) {
+			err = -EIO;
+			goto out;
+		}
+
 		for (n = 0; n < nsibling; n++) {
 			struct intel_context *ve;
 
-			ve = intel_execlists_create_virtual(ctx,
-							    siblings,
-							    nsibling);
+			ve = intel_execlists_create_virtual(siblings, nsibling);
 			if (IS_ERR(ve)) {
 				err = PTR_ERR(ve);
 				onstack_fence_fini(&fence);
@@ -1716,6 +3320,8 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 			}
 		}
 		onstack_fence_fini(&fence);
+		intel_engine_flush_submission(master);
+		igt_spinner_end(&spin);
 
 		if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
 			pr_err("Master request did not execute (on %s)!\n",
@@ -1749,10 +3355,10 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
 out:
 	for (n = 0; !IS_ERR(rq[n]); n++)
 		i915_request_put(rq[n]);
-	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+	if (igt_flush_test(gt->i915))
 		err = -EIO;
 
-	kernel_context_close(ctx);
+	igt_spinner_fini(&spin);
 	return err;
 }
 
@@ -1766,70 +3372,725 @@ static int live_virtual_bond(void *arg)
 		{ "schedule", BOND_SCHEDULE },
 		{ },
 	};
-	struct drm_i915_private *i915 = arg;
+	struct intel_gt *gt = arg;
 	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
 	unsigned int class, inst;
-	int err = 0;
+	int err;
 
-	if (USES_GUC_SUBMISSION(i915))
+	if (USES_GUC_SUBMISSION(gt->i915))
 		return 0;
 
-	mutex_lock(&i915->drm.struct_mutex);
-
 	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
 		const struct phase *p;
 		int nsibling;
 
 		nsibling = 0;
 		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
-			if (!i915->engine_class[class][inst])
+			if (!gt->engine_class[class][inst])
 				break;
 
 			GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
-			siblings[nsibling++] = i915->engine_class[class][inst];
+			siblings[nsibling++] = gt->engine_class[class][inst];
 		}
 		if (nsibling < 2)
 			continue;
 
 		for (p = phases; p->name; p++) {
-			err = bond_virtual_engine(i915,
+			err = bond_virtual_engine(gt,
 						  class, siblings, nsibling,
 						  p->flags);
 			if (err) {
 				pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
 				       __func__, p->name, class, nsibling, err);
-				goto out_unlock;
+				return err;
 			}
 		}
 	}
 
-out_unlock:
-	mutex_unlock(&i915->drm.struct_mutex);
+	return 0;
+}
+
+static int reset_virtual_engine(struct intel_gt *gt,
+				struct intel_engine_cs **siblings,
+				unsigned int nsibling)
+{
+	struct intel_engine_cs *engine;
+	struct intel_context *ve;
+	unsigned long *heartbeat;
+	struct igt_spinner spin;
+	struct i915_request *rq;
+	unsigned int n;
+	int err = 0;
+
+	/*
+	 * In order to support offline error capture for fast preempt reset,
+	 * we need to decouple the guilty request and ensure that it and its
+	 * descendents are not executed while the capture is in progress.
+	 */
+
+	heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
+	if (!heartbeat)
+		return -ENOMEM;
+
+	if (igt_spinner_init(&spin, gt)) {
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	ve = intel_execlists_create_virtual(siblings, nsibling);
+	if (IS_ERR(ve)) {
+		err = PTR_ERR(ve);
+		goto out_spin;
+	}
+
+	for (n = 0; n < nsibling; n++)
+		engine_heartbeat_disable(siblings[n], &heartbeat[n]);
+
+	rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_heartbeat;
+	}
+	i915_request_add(rq);
+
+	if (!igt_wait_for_spinner(&spin, rq)) {
+		intel_gt_set_wedged(gt);
+		err = -ETIME;
+		goto out_heartbeat;
+	}
+
+	engine = rq->engine;
+	GEM_BUG_ON(engine == ve->engine);
+
+	/* Take ownership of the reset and tasklet */
+	if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+			     &gt->reset.flags)) {
+		intel_gt_set_wedged(gt);
+		err = -EBUSY;
+		goto out_heartbeat;
+	}
+	tasklet_disable(&engine->execlists.tasklet);
+
+	engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+	GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+	/* Fake a preemption event; failed of course */
+	spin_lock_irq(&engine->active.lock);
+	__unwind_incomplete_requests(engine);
+	spin_unlock_irq(&engine->active.lock);
+	GEM_BUG_ON(rq->engine != ve->engine);
+
+	/* Reset the engine while keeping our active request on hold */
+	execlists_hold(engine, rq);
+	GEM_BUG_ON(!i915_request_on_hold(rq));
+
+	intel_engine_reset(engine, NULL);
+	GEM_BUG_ON(rq->fence.error != -EIO);
+
+	/* Release our grasp on the engine, letting CS flow again */
+	tasklet_enable(&engine->execlists.tasklet);
+	clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
+
+	/* Check that we do not resubmit the held request */
+	i915_request_get(rq);
+	if (!i915_request_wait(rq, 0, HZ / 5)) {
+		pr_err("%s: on hold request completed!\n",
+		       engine->name);
+		intel_gt_set_wedged(gt);
+		err = -EIO;
+		goto out_rq;
+	}
+	GEM_BUG_ON(!i915_request_on_hold(rq));
+
+	/* But is resubmitted on release */
+	execlists_unhold(engine, rq);
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		pr_err("%s: held request did not complete!\n",
+		       engine->name);
+		intel_gt_set_wedged(gt);
+		err = -ETIME;
+	}
+
+out_rq:
+	i915_request_put(rq);
+out_heartbeat:
+	for (n = 0; n < nsibling; n++)
+		engine_heartbeat_enable(siblings[n], heartbeat[n]);
+
+	intel_context_put(ve);
+out_spin:
+	igt_spinner_fini(&spin);
+out_free:
+	kfree(heartbeat);
 	return err;
 }
 
+static int live_virtual_reset(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+	unsigned int class, inst;
+
+	/*
+	 * Check that we handle a reset event within a virtual engine.
+	 * Only the physical engine is reset, but we have to check the flow
+	 * of the virtual requests around the reset, and make sure it is not
+	 * forgotten.
+	 */
+
+	if (USES_GUC_SUBMISSION(gt->i915))
+		return 0;
+
+	if (!intel_has_reset_engine(gt))
+		return 0;
+
+	for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+		int nsibling, err;
+
+		nsibling = 0;
+		for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+			if (!gt->engine_class[class][inst])
+				continue;
+
+			siblings[nsibling++] = gt->engine_class[class][inst];
+		}
+		if (nsibling < 2)
+			continue;
+
+		err = reset_virtual_engine(gt, siblings, nsibling);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int intel_execlists_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(live_sanitycheck),
+		SUBTEST(live_unlite_switch),
+		SUBTEST(live_unlite_preempt),
+		SUBTEST(live_hold_reset),
+		SUBTEST(live_timeslice_preempt),
+		SUBTEST(live_timeslice_queue),
 		SUBTEST(live_busywait_preempt),
 		SUBTEST(live_preempt),
 		SUBTEST(live_late_preempt),
+		SUBTEST(live_nopreempt),
+		SUBTEST(live_preempt_cancel),
 		SUBTEST(live_suppress_self_preempt),
 		SUBTEST(live_suppress_wait_preempt),
 		SUBTEST(live_chain_preempt),
+		SUBTEST(live_preempt_gang),
 		SUBTEST(live_preempt_hang),
+		SUBTEST(live_preempt_timeout),
 		SUBTEST(live_preempt_smoke),
 		SUBTEST(live_virtual_engine),
 		SUBTEST(live_virtual_mask),
+		SUBTEST(live_virtual_preserved),
 		SUBTEST(live_virtual_bond),
+		SUBTEST(live_virtual_reset),
 	};
 
 	if (!HAS_EXECLISTS(i915))
 		return 0;
 
-	if (i915_terminally_wedged(i915))
+	if (intel_gt_is_wedged(&i915->gt))
+		return 0;
+
+	return intel_gt_live_subtests(tests, &i915->gt);
+}
+
+static void hexdump(const void *buf, size_t len)
+{
+	const size_t rowsize = 8 * sizeof(u32);
+	const void *prev = NULL;
+	bool skip = false;
+	size_t pos;
+
+	for (pos = 0; pos < len; pos += rowsize) {
+		char line[128];
+
+		if (prev && !memcmp(prev, buf + pos, rowsize)) {
+			if (!skip) {
+				pr_info("*\n");
+				skip = true;
+			}
+			continue;
+		}
+
+		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+						rowsize, sizeof(u32),
+						line, sizeof(line),
+						false) >= sizeof(line));
+		pr_info("[%04zx] %s\n", pos, line);
+
+		prev = buf + pos;
+		skip = false;
+	}
+}
+
+static int live_lrc_layout(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	u32 *lrc;
+	int err;
+
+	/*
+	 * Check the registers offsets we use to create the initial reg state
+	 * match the layout saved by HW.
+	 */
+
+	lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!lrc)
+		return -ENOMEM;
+
+	err = 0;
+	for_each_engine(engine, gt, id) {
+		u32 *hw;
+		int dw;
+
+		if (!engine->default_state)
+			continue;
+
+		hw = i915_gem_object_pin_map(engine->default_state,
+					     I915_MAP_WB);
+		if (IS_ERR(hw)) {
+			err = PTR_ERR(hw);
+			break;
+		}
+		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+		execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
+					 engine->kernel_context,
+					 engine,
+					 engine->kernel_context->ring,
+					 true);
+
+		dw = 0;
+		do {
+			u32 lri = hw[dw];
+
+			if (lri == 0) {
+				dw++;
+				continue;
+			}
+
+			if (lrc[dw] == 0) {
+				pr_debug("%s: skipped instruction %x at dword %d\n",
+					 engine->name, lri, dw);
+				dw++;
+				continue;
+			}
+
+			if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+				pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+				       engine->name, dw, lri);
+				err = -EINVAL;
+				break;
+			}
+
+			if (lrc[dw] != lri) {
+				pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+				       engine->name, dw, lri, lrc[dw]);
+				err = -EINVAL;
+				break;
+			}
+
+			lri &= 0x7f;
+			lri++;
+			dw++;
+
+			while (lri) {
+				if (hw[dw] != lrc[dw]) {
+					pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+					       engine->name, dw, hw[dw], lrc[dw]);
+					err = -EINVAL;
+					break;
+				}
+
+				/*
+				 * Skip over the actual register value as we
+				 * expect that to differ.
+				 */
+				dw += 2;
+				lri -= 2;
+			}
+		} while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+		if (err) {
+			pr_info("%s: HW register image:\n", engine->name);
+			hexdump(hw, PAGE_SIZE);
+
+			pr_info("%s: SW register image:\n", engine->name);
+			hexdump(lrc, PAGE_SIZE);
+		}
+
+		i915_gem_object_unpin_map(engine->default_state);
+		if (err)
+			break;
+	}
+
+	kfree(lrc);
+	return err;
+}
+
+static int find_offset(const u32 *lri, u32 offset)
+{
+	int i;
+
+	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+		if (lri[i] == offset)
+			return i;
+
+	return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check the assumed register offsets match the actual locations in
+	 * the context image.
+	 */
+
+	for_each_engine(engine, gt, id) {
+		const struct {
+			u32 reg;
+			u32 offset;
+			const char *name;
+		} tbl[] = {
+			{
+				i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+				CTX_RING_START - 1,
+				"RING_START"
+			},
+			{
+				i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+				CTX_RING_CTL - 1,
+				"RING_CTL"
+			},
+			{
+				i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+				CTX_RING_HEAD - 1,
+				"RING_HEAD"
+			},
+			{
+				i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+				CTX_RING_TAIL - 1,
+				"RING_TAIL"
+			},
+			{
+				i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+				lrc_ring_mi_mode(engine),
+				"RING_MI_MODE"
+			},
+			{
+				i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
+				CTX_BB_STATE - 1,
+				"BB_STATE"
+			},
+			{ },
+		}, *t;
+		u32 *hw;
+
+		if (!engine->default_state)
+			continue;
+
+		hw = i915_gem_object_pin_map(engine->default_state,
+					     I915_MAP_WB);
+		if (IS_ERR(hw)) {
+			err = PTR_ERR(hw);
+			break;
+		}
+		hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+		for (t = tbl; t->name; t++) {
+			int dw = find_offset(hw, t->reg);
+
+			if (dw != t->offset) {
+				pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+				       engine->name,
+				       t->name,
+				       t->reg,
+				       dw,
+				       t->offset);
+				err = -EINVAL;
+			}
+		}
+
+		i915_gem_object_unpin_map(engine->default_state);
+	}
+
+	return err;
+}
+
+static int __live_lrc_state(struct intel_engine_cs *engine,
+			    struct i915_vma *scratch)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	enum {
+		RING_START_IDX = 0,
+		RING_TAIL_IDX,
+		MAX_IDX
+	};
+	u32 expected[MAX_IDX];
+	u32 *cs;
+	int err;
+	int n;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_pin(ce);
+	if (err)
+		goto err_put;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	cs = intel_ring_begin(rq, 4 * MAX_IDX);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		i915_request_add(rq);
+		goto err_unpin;
+	}
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+	*cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+	*cs++ = 0;
+
+	expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+	*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+	*cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+	*cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+	*cs++ = 0;
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	intel_engine_flush_submission(engine);
+	expected[RING_TAIL_IDX] = ce->ring->tail;
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto err_rq;
+	}
+
+	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_rq;
+	}
+
+	for (n = 0; n < MAX_IDX; n++) {
+		if (cs[n] != expected[n]) {
+			pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+			       engine->name, n, cs[n], expected[n]);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+	i915_request_put(rq);
+err_unpin:
+	intel_context_unpin(ce);
+err_put:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_vma *scratch;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check the live register state matches what we expect for this
+	 * intel_context.
+	 */
+
+	scratch = create_scratch(gt);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	for_each_engine(engine, gt, id) {
+		err = __live_lrc_state(engine, scratch);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	i915_vma_unpin_and_release(&scratch, 0);
+	return err;
+}
+
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+	struct i915_request *rq;
+	u32 *cs;
+	int n;
+
+	rq = intel_engine_create_kernel_request(engine);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+	if (IS_ERR(cs)) {
+		i915_request_add(rq);
+		return PTR_ERR(cs);
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		*cs++ = CS_GPR(engine, n);
+		*cs++ = STACK_MAGIC;
+	}
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+	i915_request_add(rq);
+
+	return 0;
+}
+
+static int __live_gpr_clear(struct intel_engine_cs *engine,
+			    struct i915_vma *scratch)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+	u32 *cs;
+	int err;
+	int n;
+
+	if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+		return 0; /* GPR only on rcs0 for gen8 */
+
+	err = gpr_make_dirty(engine);
+	if (err)
+		return err;
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_put;
+	}
+
+	cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		i915_request_add(rq);
+		goto err_put;
+	}
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+		*cs++ = CS_GPR(engine, n);
+		*cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+		*cs++ = 0;
+	}
+
+	i915_request_get(rq);
+	i915_request_add(rq);
+
+	if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+		err = -ETIME;
+		goto err_rq;
+	}
+
+	cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto err_rq;
+	}
+
+	for (n = 0; n < NUM_GPR_DW; n++) {
+		if (cs[n]) {
+			pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+			       engine->name,
+			       n / 2, n & 1 ? "udw" : "ldw",
+			       cs[n]);
+			err = -EINVAL;
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+	i915_request_put(rq);
+err_put:
+	intel_context_put(ce);
+	return err;
+}
+
+static int live_gpr_clear(void *arg)
+{
+	struct intel_gt *gt = arg;
+	struct intel_engine_cs *engine;
+	struct i915_vma *scratch;
+	enum intel_engine_id id;
+	int err = 0;
+
+	/*
+	 * Check that GPR registers are cleared in new contexts as we need
+	 * to avoid leaking any information from previous contexts.
+	 */
+
+	scratch = create_scratch(gt);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	for_each_engine(engine, gt, id) {
+		err = __live_gpr_clear(engine, scratch);
+		if (err)
+			break;
+	}
+
+	if (igt_flush_test(gt->i915))
+		err = -EIO;
+
+	i915_vma_unpin_and_release(&scratch, 0);
+	return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_lrc_layout),
+		SUBTEST(live_lrc_fixed),
+		SUBTEST(live_lrc_state),
+		SUBTEST(live_gpr_clear),
+	};
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(i915))
 		return 0;
 
-	return i915_subtests(tests, i915);
+	return intel_gt_live_subtests(tests, &i915->gt);
 }