summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c73
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h30
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c149
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c85
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c123
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hangcheck.c5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c1240
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c712
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c131
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ringbuffer.c99
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c46
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c62
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c50
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c155
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c513
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c4
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c97
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c63
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c30
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c51
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c4
46 files changed, 3138 insertions, 1029 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 09c68dda2098..55317081d48b 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -120,7 +120,6 @@ __dma_fence_signal__notify(struct dma_fence *fence,
struct dma_fence_cb *cur, *tmp;
lockdep_assert_held(fence->lock);
- lockdep_assert_irqs_disabled();
list_for_each_entry_safe(cur, tmp, list, node) {
INIT_LIST_HEAD(&cur->node);
@@ -134,9 +133,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
const ktime_t timestamp = ktime_get();
struct intel_context *ce, *cn;
struct list_head *pos, *next;
+ unsigned long flags;
LIST_HEAD(signal);
- spin_lock(&b->irq_lock);
+ spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_armed && list_empty(&b->signalers))
__intel_breadcrumbs_disarm_irq(b);
@@ -182,30 +182,23 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
}
}
- spin_unlock(&b->irq_lock);
+ spin_unlock_irqrestore(&b->irq_lock, flags);
list_for_each_safe(pos, next, &signal) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
struct list_head cb_list;
- spin_lock(&rq->lock);
+ spin_lock_irqsave(&rq->lock, flags);
list_replace(&rq->fence.cb_list, &cb_list);
__dma_fence_signal__timestamp(&rq->fence, timestamp);
__dma_fence_signal__notify(&rq->fence, &cb_list);
- spin_unlock(&rq->lock);
+ spin_unlock_irqrestore(&rq->lock, flags);
i915_request_put(rq);
}
}
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
- local_irq_disable();
- intel_engine_breadcrumbs_irq(engine);
- local_irq_enable();
-}
-
static void signal_irq_work(struct irq_work *work)
{
struct intel_engine_cs *engine =
@@ -275,7 +268,6 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
- lockdep_assert_irqs_disabled();
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
@@ -325,7 +317,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
lockdep_assert_held(&rq->lock);
- lockdep_assert_irqs_disabled();
/*
* We must wait for b->irq_lock so that we know the interrupt handler
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index f55691d151ae..be34d97ac18f 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -134,10 +134,11 @@ static int __context_pin_state(struct i915_vma *vma)
static void __context_unpin_state(struct i915_vma *vma)
{
- __i915_vma_unpin(vma);
i915_vma_make_shrinkable(vma);
+ __i915_vma_unpin(vma);
}
+__i915_active_call
static void __intel_context_retire(struct i915_active *active)
{
struct intel_context *ce = container_of(active, typeof(*ce), active);
@@ -150,6 +151,7 @@ static void __intel_context_retire(struct i915_active *active)
intel_timeline_unpin(ce->timeline);
intel_ring_unpin(ce->ring);
+
intel_context_put(ce);
}
@@ -219,12 +221,20 @@ intel_context_init(struct intel_context *ce,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
+ struct i915_address_space *vm;
+
GEM_BUG_ON(!engine->cops);
kref_init(&ce->ref);
ce->gem_context = ctx;
- ce->vm = i915_vm_get(ctx->vm ?: &engine->gt->ggtt->vm);
+ rcu_read_lock();
+ vm = rcu_dereference(ctx->vm);
+ if (vm)
+ ce->vm = i915_vm_get(vm);
+ else
+ ce->vm = i915_vm_get(&engine->gt->ggtt->vm);
+ rcu_read_unlock();
if (ctx->timeline)
ce->timeline = intel_timeline_get(ctx->timeline);
@@ -238,7 +248,7 @@ intel_context_init(struct intel_context *ce,
mutex_init(&ce->pin_mutex);
- i915_active_init(ctx->i915, &ce->active,
+ i915_active_init(&ce->active,
__intel_context_active, __intel_context_retire);
}
@@ -298,14 +308,14 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
/* Only suitable for use in remotely modifying this context */
GEM_BUG_ON(rq->hw_context == ce);
- if (rq->timeline != tl) { /* beware timeline sharing */
+ if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
err = mutex_lock_interruptible_nested(&tl->mutex,
SINGLE_DEPTH_NESTING);
if (err)
return err;
/* Queue this switch after current activity by this context. */
- err = i915_active_request_set(&tl->last_request, rq);
+ err = i915_active_fence_set(&tl->last_request, rq);
mutex_unlock(&tl->mutex);
if (err)
return err;
@@ -319,7 +329,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
* words transfer the pinned ce object to tracked active request.
*/
GEM_BUG_ON(i915_active_is_idle(&ce->active));
- return i915_active_ref(&ce->active, rq->timeline, rq);
+ return i915_active_add_request(&ce->active, rq);
}
struct i915_request *intel_context_create_request(struct intel_context *ce)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index bf9cedfccbf0..6959b05ae5f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -58,6 +58,7 @@ struct intel_context {
u32 *lrc_reg_state;
u64 lrc_desc;
+ u32 tag; /* cookie passed to HW to track this context on submission */
unsigned int active_count; /* protected by timeline->mutex */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..c9e8c8ccbd47 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -335,7 +335,6 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine);
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 82630db0394b..80fd072ac719 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -680,6 +680,8 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
engine->status_page.vma))
goto out_frame;
+ mutex_lock(&frame->timeline.mutex);
+
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
@@ -688,18 +690,22 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
- frame->rq.timeline = &frame->timeline;
+ rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
dw = intel_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;
+ spin_lock_irq(&engine->active.lock);
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+ spin_unlock_irq(&engine->active.lock);
+
GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
intel_timeline_unpin(&frame->timeline);
out_timeline:
+ mutex_unlock(&frame->timeline.mutex);
intel_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
@@ -814,8 +820,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->default_state)
i915_gem_object_put(engine->default_state);
- intel_context_unpin(engine->kernel_context);
- intel_context_put(engine->kernel_context);
+ if (engine->kernel_context) {
+ intel_context_unpin(engine->kernel_context);
+ intel_context_put(engine->kernel_context);
+ }
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
intel_wa_list_free(&engine->ctx_wa_list);
@@ -948,6 +956,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone)
{
struct drm_i915_private *i915 = engine->i915;
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -965,7 +974,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
- for_each_instdone_slice_subslice(i915, slice, subslice) {
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
instdone->sampler[slice][subslice] =
read_subslice_reg(engine, slice, subslice,
GEN7_SAMPLER_INSTDONE);
@@ -1118,6 +1127,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
case 3:
/* maybe only uses physical not virtual addresses */
return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
+ case 4:
+ return !IS_I965G(engine->i915); /* who knows! */
case 6:
return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
default:
@@ -1193,6 +1204,27 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
}
}
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+ struct intel_timeline *tl;
+
+ /*
+ * Even though we are holding the engine->active.lock here, there
+ * is no control over the submission queue per-se and we are
+ * inspecting the active state at a random point in time, with an
+ * unknown queue. Play safe and make sure the timeline remains valid.
+ * (Only being used for pretty printing, one extra kref shouldn't
+ * cause a camel stampede!)
+ */
+ rcu_read_lock();
+ tl = rcu_dereference(rq->timeline);
+ if (!kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+
+ return tl;
+}
+
static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
@@ -1287,27 +1319,37 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
int len;
len = snprintf(hdr, sizeof(hdr),
- "\t\tActive[%d: ",
+ "\t\tActive[%d]: ",
(int)(port - execlists->active));
- if (!i915_request_signaled(rq))
+ if (!i915_request_signaled(rq)) {
+ struct intel_timeline *tl = get_timeline(rq);
+
len += snprintf(hdr + len, sizeof(hdr) - len,
"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
+ tl ? tl->hwsp_offset : 0,
hwsp_seqno(rq));
+
+ if (tl)
+ intel_timeline_put(tl);
+ }
snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
+ struct intel_timeline *tl = get_timeline(rq);
char hdr[80];
snprintf(hdr, sizeof(hdr),
"\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
(int)(port - execlists->pending),
i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
+ tl ? tl->hwsp_offset : 0,
hwsp_seqno(rq));
print_request(m, rq, hdr);
+
+ if (tl)
+ intel_timeline_put(tl);
}
spin_unlock_irqrestore(&engine->active.lock, flags);
} else if (INTEL_GEN(dev_priv) > 6) {
@@ -1385,6 +1427,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irqsave(&engine->active.lock, flags);
rq = intel_engine_find_active_request(engine);
if (rq) {
+ struct intel_timeline *tl = get_timeline(rq);
+
print_request(m, rq, "\t\tactive ");
drm_printf(m, "\t\tring->start: 0x%08x\n",
@@ -1397,10 +1441,19 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
- drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
- rq->timeline->hwsp_offset);
+
+ if (tl) {
+ drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
+ tl->hwsp_offset);
+ intel_timeline_put(tl);
+ }
print_request_ring(m, rq);
+
+ if (rq->hw_context->lrc_reg_state) {
+ drm_printf(m, "Logical Ring Context:\n");
+ hexdump(m, rq->hw_context->lrc_reg_state, PAGE_SIZE);
+ }
}
spin_unlock_irqrestore(&engine->active.lock, flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 65b5ca74b394..8e5e513eddc9 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -11,6 +11,7 @@
#include "intel_engine_pool.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_rc6.h"
static int __engine_unpark(struct intel_wakeref *wf)
{
@@ -103,7 +104,7 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
- intel_timeline_enter(rq->timeline);
+ intel_timeline_enter(i915_request_timeline(rq));
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
@@ -123,6 +124,19 @@ out_unlock:
return result;
}
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+ struct llist_node *node, *next;
+
+ llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+ struct dma_fence_cb *cb =
+ container_of((struct list_head *)node,
+ typeof(*cb), node);
+
+ cb->func(NULL, cb);
+ }
+}
+
static int __engine_park(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
@@ -142,6 +156,8 @@ static int __engine_park(struct intel_wakeref *wf)
GEM_TRACE("%s\n", engine->name);
+ call_idle_barriers(engine); /* cleanup after wedging */
+
intel_engine_disarm_breadcrumbs(engine);
intel_engine_pool_park(&engine->pool);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 4cd54c569911..3cdbd5f8b5be 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -61,6 +61,7 @@ static int pool_active(struct i915_active *ref)
return 0;
}
+__i915_active_call
static void pool_retire(struct i915_active *ref)
{
struct intel_engine_pool_node *node =
@@ -94,7 +95,7 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_PTR(-ENOMEM);
node->pool = pool;
- i915_active_init(engine->i915, &node->active, pool_active, pool_retire);
+ i915_active_init(&node->active, pool_active, pool_retire);
obj = i915_gem_object_create_internal(engine->i915, sz);
if (IS_ERR(obj)) {
@@ -107,9 +108,19 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return node;
}
+static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+{
+ if (intel_engine_is_virtual(engine))
+ engine = intel_virtual_engine_get_sibling(engine, 0);
+
+ GEM_BUG_ON(!engine);
+ return &engine->pool;
+}
+
struct intel_engine_pool_node *
-intel_engine_pool_get(struct intel_engine_pool *pool, size_t size)
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
{
+ struct intel_engine_pool *pool = lookup_pool(engine);
struct intel_engine_pool_node *node;
struct list_head *list;
unsigned long flags;
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
index 8d069efd9457..1bd89cadc3b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -12,13 +12,13 @@
#include "i915_request.h"
struct intel_engine_pool_node *
-intel_engine_pool_get(struct intel_engine_pool *pool, size_t size);
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
static inline int
intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
struct i915_request *rq)
{
- return i915_active_ref(&node->active, rq->timeline, rq);
+ return i915_active_add_request(&node->active, rq);
}
static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a82cea95c2f2..6199064f332b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -303,10 +303,12 @@ struct intel_engine_cs {
u8 uabi_class;
u8 uabi_instance;
+ u32 uabi_capabilities;
u32 context_size;
u32 mmio_base;
- u32 uabi_capabilities;
+ unsigned int context_tag;
+#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
struct rb_node uabi_node;
@@ -481,6 +483,7 @@ struct intel_engine_cs {
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
unsigned int flags;
/*
@@ -576,20 +579,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_IS_VIRTUAL;
}
-#define instdone_slice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
+static inline bool
+intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
+{
+ return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
+}
-#define instdone_subslice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
+#define instdone_has_slice(dev_priv___, sseu___, slice___) \
+ ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
- for ((slice__) = 0, (subslice__) = 0; \
- (slice__) < I915_MAX_SLICES; \
- (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
- (slice__) += ((subslice__) == 0)) \
- for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
- (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
+ (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \
+ intel_sseu_has_subslice(sseu__, 0, subslice__))
+#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
+ for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
+ (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
+ (slice_) += ((subslice_) == 0)) \
+ for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
+ (instdone_has_subslice(dev_priv_, sseu_, slice_, \
+ subslice_)))
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 86e00a2db8a4..b0227ab2fe1b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@
#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -119,6 +120,8 @@
#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
+#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5)
+#define MI_SEMAPHORE_TOKEN_SHIFT 5
#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
@@ -132,6 +135,8 @@
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
+#define MI_LRI_CS_MMIO (1<<19)
#define MI_LRI_FORCE_POSTED (1<<12)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
@@ -147,6 +152,7 @@
#define MI_FLUSH_DW_USE_PPGTT (0<<2)
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -235,6 +241,29 @@
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
/*
* Commands used only by the command parser
*/
@@ -251,7 +280,6 @@
#define MI_CLFLUSH MI_INSTR(0x27, 0)
#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d48ec9a76ed1..8f44cf8c79b2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -6,7 +6,11 @@
#include "i915_drv.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_mocs.h"
+#include "intel_rc6.h"
#include "intel_uncore.h"
+#include "intel_pm.h"
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
{
@@ -20,13 +24,106 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
intel_gt_init_hangcheck(gt);
intel_gt_init_reset(gt);
+ intel_gt_init_requests(gt);
intel_gt_pm_init_early(gt);
intel_uc_init_early(&gt->uc);
}
-void intel_gt_init_hw(struct drm_i915_private *i915)
+void intel_gt_init_hw_early(struct drm_i915_private *i915)
{
i915->gt.ggtt = &i915->ggtt;
+
+ /* BIOS often leaves RC6 enabled, but disable it for hw init */
+ intel_gt_pm_disable(&i915->gt);
+}
+
+static void init_unused_ring(struct intel_gt *gt, u32 base)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ intel_uncore_write(uncore, RING_CTL(base), 0);
+ intel_uncore_write(uncore, RING_HEAD(base), 0);
+ intel_uncore_write(uncore, RING_TAIL(base), 0);
+ intel_uncore_write(uncore, RING_START(base), 0);
+}
+
+static void init_unused_rings(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_I830(i915)) {
+ init_unused_ring(gt, PRB1_BASE);
+ init_unused_ring(gt, SRB0_BASE);
+ init_unused_ring(gt, SRB1_BASE);
+ init_unused_ring(gt, SRB2_BASE);
+ init_unused_ring(gt, SRB3_BASE);
+ } else if (IS_GEN(i915, 2)) {
+ init_unused_ring(gt, SRB0_BASE);
+ init_unused_ring(gt, SRB1_BASE);
+ } else if (IS_GEN(i915, 3)) {
+ init_unused_ring(gt, PRB1_BASE);
+ init_unused_ring(gt, PRB2_BASE);
+ }
+}
+
+int intel_gt_init_hw(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ int ret;
+
+ BUG_ON(!i915->kernel_context);
+ ret = intel_gt_terminally_wedged(gt);
+ if (ret)
+ return ret;
+
+ gt->last_init_time = ktime_get();
+
+ /* Double layer security blanket, see i915_gem_init() */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9)
+ intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
+
+ if (IS_HASWELL(i915))
+ intel_uncore_write(uncore,
+ MI_PREDICATE_RESULT_2,
+ IS_HSW_GT3(i915) ?
+ LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
+
+ /* Apply the GT workarounds... */
+ intel_gt_apply_workarounds(gt);
+ /* ...and determine whether they are sticking. */
+ intel_gt_verify_workarounds(gt, "init");
+
+ intel_gt_init_swizzling(gt);
+
+ /*
+ * At least 830 can leave some of the unused rings
+ * "active" (ie. head != tail) after resume which
+ * will prevent c3 entry. Makes sure all unused rings
+ * are totally idle.
+ */
+ init_unused_rings(gt);
+
+ ret = i915_ppgtt_init_hw(gt);
+ if (ret) {
+ DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
+ goto out;
+ }
+
+ /* We can't enable contexts until all firmware is loaded */
+ ret = intel_uc_init_hw(&gt->uc);
+ if (ret) {
+ i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
+ goto out;
+ }
+
+ intel_mocs_init(gt);
+
+out:
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ return ret;
}
static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
@@ -207,11 +304,12 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
struct intel_uncore *uncore = gt->uncore;
+ unsigned long flags;
- spin_lock_irq(&uncore->lock);
+ spin_lock_irqsave(&uncore->lock, flags);
intel_uncore_posting_read_fw(uncore,
RING_HEAD(RENDER_RING_BASE));
- spin_unlock_irq(&uncore->lock);
+ spin_unlock_irqrestore(&uncore->lock, flags);
}
}
@@ -222,7 +320,13 @@ void intel_gt_chipset_flush(struct intel_gt *gt)
intel_gtt_chipset_flush();
}
-int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
+void intel_gt_driver_register(struct intel_gt *gt)
+{
+ if (IS_GEN(gt->i915, 5))
+ intel_gpu_ips_init(gt->i915);
+}
+
+static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
{
struct drm_i915_private *i915 = gt->i915;
struct drm_i915_gem_object *obj;
@@ -230,7 +334,7 @@ int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
int ret;
obj = i915_gem_object_create_stolen(i915, size);
- if (!obj)
+ if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate scratch page\n");
@@ -256,11 +360,44 @@ err_unref:
return ret;
}
-void intel_gt_fini_scratch(struct intel_gt *gt)
+static void intel_gt_fini_scratch(struct intel_gt *gt)
{
i915_vma_unpin_and_release(&gt->scratch, 0);
}
+int intel_gt_init(struct intel_gt *gt)
+{
+ int err;
+
+ err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ if (err)
+ return err;
+
+ intel_gt_pm_init(gt);
+
+ return 0;
+}
+
+void intel_gt_driver_remove(struct intel_gt *gt)
+{
+ GEM_BUG_ON(gt->awake);
+ intel_gt_pm_disable(gt);
+}
+
+void intel_gt_driver_unregister(struct intel_gt *gt)
+{
+ intel_gpu_ips_teardown();
+}
+
+void intel_gt_driver_release(struct intel_gt *gt)
+{
+ /* Paranoia: make sure we have disabled everything before we exit. */
+ intel_gt_pm_disable(gt);
+ intel_gt_pm_fini(gt);
+
+ intel_gt_fini_scratch(gt);
+}
+
void intel_gt_driver_late_release(struct intel_gt *gt)
{
intel_uc_driver_late_release(&gt->uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 4920cb351f10..e6ab0bff0efb 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -28,7 +28,14 @@ static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
}
void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
-void intel_gt_init_hw(struct drm_i915_private *i915);
+void intel_gt_init_hw_early(struct drm_i915_private *i915);
+int __must_check intel_gt_init_hw(struct intel_gt *gt);
+int intel_gt_init(struct intel_gt *gt);
+void intel_gt_driver_register(struct intel_gt *gt);
+
+void intel_gt_driver_unregister(struct intel_gt *gt);
+void intel_gt_driver_remove(struct intel_gt *gt);
+void intel_gt_driver_release(struct intel_gt *gt);
void intel_gt_driver_late_release(struct intel_gt *gt);
@@ -41,9 +48,6 @@ void intel_gt_chipset_flush(struct intel_gt *gt);
void intel_gt_init_hangcheck(struct intel_gt *gt);
-int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size);
-void intel_gt_fini_scratch(struct intel_gt *gt);
-
static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
enum intel_gt_scratch_field field)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 1363e069ec83..b52e2ba3d092 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -5,16 +5,20 @@
*/
#include "i915_drv.h"
+#include "i915_globals.h"
#include "i915_params.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_pm.h"
+#include "intel_rc6.h"
#include "intel_wakeref.h"
-static void pm_notify(struct drm_i915_private *i915, int state)
+static void pm_notify(struct intel_gt *gt, int state)
{
- blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
+ blocking_notifier_call_chain(&gt->pm_notifications, state, gt->i915);
}
static int __gt_unpark(struct intel_wakeref *wf)
@@ -24,6 +28,8 @@ static int __gt_unpark(struct intel_wakeref *wf)
GEM_TRACE("\n");
+ i915_globals_unpark();
+
/*
* It seems that the DMC likes to transition between the DC states a lot
* when there are no connected displays (no active power domains) during
@@ -47,21 +53,23 @@ static int __gt_unpark(struct intel_wakeref *wf)
i915_pmu_gt_unparked(i915);
intel_gt_queue_hangcheck(gt);
+ intel_gt_unpark_requests(gt);
- pm_notify(i915, INTEL_GT_UNPARK);
+ pm_notify(gt, INTEL_GT_UNPARK);
return 0;
}
static int __gt_park(struct intel_wakeref *wf)
{
- struct drm_i915_private *i915 =
- container_of(wf, typeof(*i915), gt.wakeref);
- intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
+ struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
+ intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
+ struct drm_i915_private *i915 = gt->i915;
GEM_TRACE("\n");
- pm_notify(i915, INTEL_GT_PARK);
+ pm_notify(gt, INTEL_GT_PARK);
+ intel_gt_park_requests(gt);
i915_pmu_gt_parked(i915);
if (INTEL_GEN(i915) >= 6)
@@ -73,6 +81,8 @@ static int __gt_park(struct intel_wakeref *wf)
GEM_BUG_ON(!wakeref);
intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ i915_globals_park();
+
return 0;
}
@@ -89,6 +99,16 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
BLOCKING_INIT_NOTIFIER_HEAD(&gt->pm_notifications);
}
+void intel_gt_pm_init(struct intel_gt *gt)
+{
+ /*
+ * Enabling power-management should be "self-healing". If we cannot
+ * enable a feature, simply leave it disabled with a notice to the
+ * user.
+ */
+ intel_rc6_init(&gt->rc6);
+}
+
static bool reset_engines(struct intel_gt *gt)
{
if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
@@ -123,6 +143,17 @@ void intel_gt_sanitize(struct intel_gt *gt, bool force)
__intel_engine_reset(engine, false);
}
+void intel_gt_pm_disable(struct intel_gt *gt)
+{
+ if (!is_mock_gt(gt))
+ intel_sanitize_gt_powersave(gt->i915);
+}
+
+void intel_gt_pm_fini(struct intel_gt *gt)
+{
+ intel_rc6_fini(&gt->rc6);
+}
+
int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
@@ -136,14 +167,21 @@ int intel_gt_resume(struct intel_gt *gt)
* allowing us to fixup the user contexts on their first pin.
*/
intel_gt_pm_get(gt);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_rc6_sanitize(&gt->rc6);
+
for_each_engine(engine, gt->i915, id) {
struct intel_context *ce;
intel_engine_pm_get(engine);
ce = engine->kernel_context;
- if (ce)
+ if (ce) {
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+ mutex_acquire(&ce->pin_mutex.dep_map, 0, 0, _THIS_IP_);
ce->ops->reset(ce);
+ mutex_release(&ce->pin_mutex.dep_map, 0, _THIS_IP_);
+ }
engine->serial++; /* kernel context lost */
err = engine->resume(engine);
@@ -156,11 +194,38 @@ int intel_gt_resume(struct intel_gt *gt)
break;
}
}
+
+ intel_rc6_enable(&gt->rc6);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
intel_gt_pm_put(gt);
return err;
}
+static void wait_for_idle(struct intel_gt *gt)
+{
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ /*
+ * Forcibly cancel outstanding work and leave
+ * the gpu quiet.
+ */
+ intel_gt_set_wedged(gt);
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+}
+
+void intel_gt_suspend(struct intel_gt *gt)
+{
+ intel_wakeref_t wakeref;
+
+ /* We expect to be idle already; but also want to be independent */
+ wait_for_idle(gt);
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ intel_rc6_disable(&gt->rc6);
+}
+
void intel_gt_runtime_suspend(struct intel_gt *gt)
{
intel_uc_runtime_suspend(&gt->uc);
@@ -172,3 +237,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
return intel_uc_runtime_resume(&gt->uc);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index fb39d99cd6ee..997770d3a968 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -43,10 +43,21 @@ static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
}
void intel_gt_pm_init_early(struct intel_gt *gt);
+void intel_gt_pm_init(struct intel_gt *gt);
+void intel_gt_pm_disable(struct intel_gt *gt);
+void intel_gt_pm_fini(struct intel_gt *gt);
void intel_gt_sanitize(struct intel_gt *gt, bool force);
+
int intel_gt_resume(struct intel_gt *gt);
+void intel_gt_suspend(struct intel_gt *gt);
+
void intel_gt_runtime_suspend(struct intel_gt *gt);
int intel_gt_runtime_resume(struct intel_gt *gt);
+static inline bool is_mock_gt(const struct intel_gt *gt)
+{
+ return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+}
+
#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
new file mode 100644
index 000000000000..8aed89fd2cdc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -0,0 +1,123 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_timeline.h"
+
+static void retire_requests(struct intel_timeline *tl)
+{
+ struct i915_request *rq, *rn;
+
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (!i915_request_retire(rq))
+ break;
+}
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+{
+ struct intel_gt_timelines *timelines = &gt->timelines;
+ struct intel_timeline *tl, *tn;
+ unsigned long active_count = 0;
+ unsigned long flags;
+ bool interruptible;
+ LIST_HEAD(free);
+
+ interruptible = true;
+ if (unlikely(timeout < 0))
+ timeout = -timeout, interruptible = false;
+
+ spin_lock_irqsave(&timelines->lock, flags);
+ list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+ if (!mutex_trylock(&tl->mutex))
+ continue;
+
+ intel_timeline_get(tl);
+ GEM_BUG_ON(!tl->active_count);
+ tl->active_count++; /* pin the list element */
+ spin_unlock_irqrestore(&timelines->lock, flags);
+
+ if (timeout > 0) {
+ struct dma_fence *fence;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (fence) {
+ timeout = dma_fence_wait_timeout(fence,
+ true,
+ timeout);
+ dma_fence_put(fence);
+ }
+ }
+
+ retire_requests(tl);
+
+ spin_lock_irqsave(&timelines->lock, flags);
+
+ /* Resume iteration after dropping lock */
+ list_safe_reset_next(tl, tn, link);
+ if (--tl->active_count)
+ active_count += !!rcu_access_pointer(tl->last_request.fence);
+ else
+ list_del(&tl->link);
+
+ mutex_unlock(&tl->mutex);
+
+ /* Defer the final release to after the spinlock */
+ if (refcount_dec_and_test(&tl->kref.refcount)) {
+ GEM_BUG_ON(tl->active_count);
+ list_add(&tl->link, &free);
+ }
+ }
+ spin_unlock_irqrestore(&timelines->lock, flags);
+
+ list_for_each_entry_safe(tl, tn, &free, link)
+ __intel_timeline_free(&tl->kref);
+
+ return active_count ? timeout : 0;
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+ /* If the device is asleep, we have no requests outstanding */
+ if (!intel_gt_pm_is_awake(gt))
+ return 0;
+
+ while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
+ cond_resched();
+ if (signal_pending(current))
+ return -EINTR;
+ }
+
+ return timeout;
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+ struct intel_gt *gt =
+ container_of(work, typeof(*gt), requests.retire_work.work);
+
+ intel_gt_retire_requests(gt);
+ schedule_delayed_work(&gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_init_requests(struct intel_gt *gt)
+{
+ INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
+}
+
+void intel_gt_park_requests(struct intel_gt *gt)
+{
+ cancel_delayed_work(&gt->requests.retire_work);
+}
+
+void intel_gt_unpark_requests(struct intel_gt *gt)
+{
+ schedule_delayed_work(&gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
new file mode 100644
index 000000000000..bd31cbce47e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_REQUESTS_H
+#define INTEL_GT_REQUESTS_H
+
+struct intel_gt;
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+static inline void intel_gt_retire_requests(struct intel_gt *gt)
+{
+ intel_gt_retire_requests_timeout(gt, 0);
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
+void intel_gt_init_requests(struct intel_gt *gt);
+void intel_gt_park_requests(struct intel_gt *gt);
+void intel_gt_unpark_requests(struct intel_gt *gt);
+
+#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index dc295c196d11..802f516a3430 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -18,6 +18,7 @@
#include "i915_vma.h"
#include "intel_engine_types.h"
#include "intel_reset_types.h"
+#include "intel_rc6_types.h"
#include "intel_wakeref.h"
struct drm_i915_private;
@@ -49,7 +50,19 @@ struct intel_gt {
struct list_head hwsp_free_list;
} timelines;
+ struct intel_gt_requests {
+ /**
+ * We leave the user IRQ off as much as possible,
+ * but this means that requests will finish and never
+ * be retired once the system goes idle. Set a timer to
+ * fire periodically while the ring is running. When it
+ * fires, go retire requests.
+ */
+ struct delayed_work retire_work;
+ } requests;
+
struct intel_wakeref wakeref;
+ atomic_t user_wakeref;
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
@@ -66,6 +79,8 @@ struct intel_gt {
*/
intel_wakeref_t awake;
+ struct intel_rc6 rc6;
+
struct blocking_notifier_head pm_notifications;
ktime_t last_init_time;
@@ -89,9 +104,6 @@ enum intel_gt_scratch_field {
INTEL_GT_SCRATCH_FIELD_DEFAULT = 0,
/* 8 bytes */
- INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA = 128,
-
- /* 8 bytes */
INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
/* 8 bytes */
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
index 05d042cdefe2..9814b18b32ad 100644
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -53,6 +53,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
static bool subunits_stuck(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
struct intel_instdone instdone;
struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
bool stuck;
@@ -71,7 +72,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
stuck &= instdone_unchanged(instdone.slice_common,
&accu_instdone->slice_common);
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+ for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
&accu_instdone->sampler[slice][subslice]);
stuck &= instdone_unchanged(instdone.row[slice][subslice],
@@ -283,7 +284,7 @@ static void hangcheck_elapsed(struct work_struct *work)
for_each_engine(engine, gt->i915, id) {
struct hangcheck hc;
- intel_engine_signal_breadcrumbs(engine);
+ intel_engine_breadcrumbs_irq(engine);
hangcheck_load_sample(engine, &hc);
hangcheck_accumulate_sample(engine, &hc);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d42584439f51..468438fb47af 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -230,9 +230,27 @@ static int __execlists_context_alloc(struct intel_context *ce,
struct intel_engine_cs *engine);
static void execlists_init_reg_state(u32 *reg_state,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring);
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool close);
+
+static void __context_pin_acquire(struct intel_context *ce)
+{
+ mutex_acquire(&ce->pin_mutex.dep_map, 2, 0, _RET_IP_);
+}
+
+static void __context_pin_release(struct intel_context *ce)
+{
+ mutex_release(&ce->pin_mutex.dep_map, 0, _RET_IP_);
+}
+
+static void mark_eio(struct i915_request *rq)
+{
+ if (!i915_request_signaled(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+}
static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
{
@@ -422,12 +440,8 @@ assert_priority_queue(const struct i915_request *prev,
static u64
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- struct i915_gem_context *ctx = ce->gem_context;
u64 desc;
- BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
- BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
-
desc = INTEL_LEGACY_32B_CONTEXT;
if (i915_vm_is_4lvl(ce->vm))
desc = INTEL_LEGACY_64B_CONTEXT;
@@ -445,25 +459,421 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
* anything below.
*/
if (INTEL_GEN(engine->i915) >= 11) {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
- /* bits 37-47 */
-
desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
/* bits 48-53 */
- /* TODO: decide what to do with SW counter (bits 55-60) */
-
desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
/* bits 61-63 */
- } else {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
}
return desc;
}
+static u32 *set_offsets(u32 *regs,
+ const u8 *data,
+ const struct intel_engine_cs *engine)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+ (((x) >> 2) & 0x7f)
+#define END() 0
+{
+ const u32 base = engine->mmio_base;
+
+ while (*data) {
+ u8 count, flags;
+
+ if (*data & BIT(7)) { /* skip */
+ regs += *data++ & ~BIT(7);
+ continue;
+ }
+
+ count = *data & 0x3f;
+ flags = *data >> 6;
+ data++;
+
+ *regs = MI_LOAD_REGISTER_IMM(count);
+ if (flags & POSTED)
+ *regs |= MI_LRI_FORCE_POSTED;
+ if (INTEL_GEN(engine->i915) >= 11)
+ *regs |= MI_LRI_CS_MMIO;
+ regs++;
+
+ GEM_BUG_ON(!count);
+ do {
+ u32 offset = 0;
+ u8 v;
+
+ do {
+ v = *data++;
+ offset <<= 7;
+ offset |= v & ~BIT(7);
+ } while (v & BIT(7));
+
+ *regs = base + (offset << 2);
+ regs += 2;
+ } while (--count);
+ }
+
+ return regs;
+}
+
+static const u8 gen8_xcs_offsets[] = {
+ NOP(1),
+ LRI(11, 0),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+
+ NOP(9),
+ LRI(9, 0),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(2, 0),
+ REG16(0x200),
+ REG(0x028),
+
+ END(),
+};
+
+static const u8 gen9_xcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, POSTED),
+ REG16(0x200),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+
+ END(),
+};
+
+static const u8 gen12_xcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(2, POSTED),
+ REG16(0x200),
+ REG16(0x204),
+
+ NOP(11),
+ LRI(50, POSTED),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG16(0x588),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+
+ END(),
+};
+
+static const u8 gen8_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(),
+};
+
+static const u8 gen11_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(1, POSTED),
+ REG(0x1b0),
+
+ NOP(10),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(),
+};
+
+static const u8 gen12_rcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(),
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
+{
+ if (engine->class == RENDER_CLASS) {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return gen11_rcs_offsets;
+ else
+ return gen8_rcs_offsets;
+ } else {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_xcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_xcs_offsets;
+ else
+ return gen8_xcs_offsets;
+ }
+}
+
static void unwind_wa_tail(struct i915_request *rq)
{
rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
@@ -562,6 +972,18 @@ __execlists_schedule_in(struct i915_request *rq)
intel_context_get(ce);
+ if (ce->tag) {
+ /* Use a fixed tag for OA and friends */
+ ce->lrc_desc |= (u64)ce->tag << 32;
+ } else {
+ /* We don't need a strict matching tag, just different values */
+ ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+ ce->lrc_desc |=
+ (u64)(engine->context_tag++ % NUM_CONTEXT_TAG) <<
+ GEN11_SW_CTX_ID_SHIFT;
+ BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ }
+
intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
intel_engine_context_in(engine);
@@ -631,7 +1053,6 @@ execlists_schedule_out(struct i915_request *rq)
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
- GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
old = READ_ONCE(ce->inflight);
do
@@ -648,7 +1069,7 @@ static u64 execlists_update_context(const struct i915_request *rq)
struct intel_context *ce = rq->hw_context;
u64 desc;
- ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+ ce->lrc_reg_state[CTX_RING_TAIL] =
intel_ring_set_tail(rq->ring, rq->tail);
/*
@@ -693,6 +1114,9 @@ trace_ports(const struct intel_engine_execlists *execlists,
const struct intel_engine_cs *engine =
container_of(execlists, typeof(*engine), execlists);
+ if (!ports[0])
+ return;
+
GEM_TRACE("%s: %s { %llx:%lld%s, %llx:%lld }\n",
engine->name, msg,
ports[0]->fence.context,
@@ -797,6 +1221,17 @@ static bool can_merge_rq(const struct i915_request *prev,
GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
+ /*
+ * We do not submit known completed requests. Therefore if the next
+ * request is already completed, we can pretend to merge it in
+ * with the previous context (and we will skip updating the ELSP
+ * and tracking). Thus hopefully keeping the ELSP full with active
+ * contexts, despite the best efforts of preempt-to-busy to confuse
+ * us.
+ */
+ if (i915_request_completed(next))
+ return true;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
@@ -806,47 +1241,7 @@ static bool can_merge_rq(const struct i915_request *prev,
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
- u32 base = engine->mmio_base;
-
- /* Must match execlists_init_reg_state()! */
-
- regs[CTX_CONTEXT_CONTROL] =
- i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
- regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
- regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
- regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
- regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
-
- regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
- regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
- regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
- regs[CTX_SECOND_BB_HEAD_U] =
- i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
- regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
- regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
-
- regs[CTX_CTX_TIMESTAMP] =
- i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
- regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
- regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
- regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
- regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
- regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
- regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
- regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
- regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
-
- if (engine->class == RENDER_CLASS) {
- regs[CTX_RCS_INDIRECT_CTX] =
- i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
- regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
- i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
- regs[CTX_BB_PER_CTX_PTR] =
- i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
-
- regs[CTX_R_PWR_CLK_STATE] =
- i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
- }
+ set_offsets(regs, reg_offsets(engine), engine);
}
static bool virtual_matches(const struct virtual_engine *ve,
@@ -1172,21 +1567,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue;
}
- if (i915_request_completed(rq)) {
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
- rb_erase_cached(rb, &execlists->virtual);
- RB_CLEAR_NODE(rb);
-
- rq->engine = engine;
- __i915_request_submit(rq);
-
- spin_unlock(&ve->base.active.lock);
-
- rb = rb_first_cached(&execlists->virtual);
- continue;
- }
-
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
return; /* leave this for another */
@@ -1214,7 +1594,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
unsigned int n;
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
- virtual_update_register_offsets(regs, engine);
+
+ if (!intel_engine_has_relative_mmio(engine))
+ virtual_update_register_offsets(regs,
+ engine);
if (!list_empty(&ve->context.signals))
virtual_xfer_breadcrumbs(ve, engine);
@@ -1237,11 +1620,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine);
}
- __i915_request_submit(rq);
- if (!i915_request_completed(rq)) {
+ if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
+ i915_request_put(rq);
+
+ /*
+ * Hmm, we have a bunch of virtual engine requests,
+ * but the first one was already completed (thanks
+ * preempt-to-busy!). Keep looking at the veng queue
+ * until we have no more relevant requests (i.e.
+ * the normal submit queue has higher priority).
+ */
+ if (!submit) {
+ spin_unlock(&ve->base.active.lock);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
}
spin_unlock(&ve->base.active.lock);
@@ -1254,8 +1650,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (i915_request_completed(rq))
- goto skip;
+ bool merge = true;
/*
* Can we combine this request with the current port?
@@ -1296,14 +1691,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
- *port = execlists_schedule_in(last, port - execlists->pending);
- port++;
+ merge = false;
}
- last = rq;
- submit = true;
-skip:
- __i915_request_submit(rq);
+ if (__i915_request_submit(rq)) {
+ if (!merge) {
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ port++;
+ last = NULL;
+ }
+
+ GEM_BUG_ON(last &&
+ !can_merge_ctx(last->hw_context,
+ rq->hw_context));
+
+ submit = true;
+ last = rq;
+ }
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1334,11 +1738,26 @@ done:
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
- memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists->switch_priority_hint =
switch_prio(engine, *execlists->pending);
+
+ /*
+ * Skip if we ended up with exactly the same set of requests,
+ * e.g. trying to timeslice a pair of ordered contexts
+ */
+ if (!memcmp(execlists->active, execlists->pending,
+ (port - execlists->pending + 1) * sizeof(*port))) {
+ do
+ execlists_schedule_out(fetch_and_zero(port));
+ while (port-- != execlists->pending);
+
+ goto skip_submit;
+ }
+
+ memset(port + 1, 0, (last_port - port) * sizeof(*port));
execlists_submit_ports(engine);
} else {
+skip_submit:
ring_set_paused(engine, 0);
}
}
@@ -1371,13 +1790,6 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
}
-enum csb_step {
- CSB_NOP,
- CSB_PROMOTE,
- CSB_PREEMPT,
- CSB_COMPLETE,
-};
-
/*
* Starting with Gen12, the status has a new format:
*
@@ -1404,7 +1816,7 @@ enum csb_step {
* bits 47-57: sw context id of the lrc the GT switched away from
* bits 58-63: sw counter of the lrc the GT switched away from
*/
-static inline enum csb_step
+static inline bool
gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
{
u32 lower_dw = csb[0];
@@ -1413,9 +1825,6 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
- if (!ctx_away_valid && ctx_to_valid)
- return CSB_PROMOTE;
-
/*
* The context switch detail is not guaranteed to be 5 when a preemption
* occurs, so we can't just check for that. The check below works for
@@ -1423,8 +1832,10 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
* instructions and lite-restore. Preempt-to-idle via the CTRL register
* would require some extra handling, but we don't support that.
*/
- if (new_queue && ctx_away_valid)
- return CSB_PREEMPT;
+ if (!ctx_away_valid || new_queue) {
+ GEM_BUG_ON(!ctx_to_valid);
+ return true;
+ }
/*
* switch detail = 5 is covered by the case above and we do not expect a
@@ -1432,30 +1843,13 @@ gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
* use polling mode.
*/
GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
-
- if (*execlists->active) {
- GEM_BUG_ON(!ctx_away_valid);
- return CSB_COMPLETE;
- }
-
- return CSB_NOP;
+ return false;
}
-static inline enum csb_step
+static inline bool
gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
{
- unsigned int status = *csb;
-
- if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
- return CSB_PROMOTE;
-
- if (status & GEN8_CTX_STATUS_PREEMPTED)
- return CSB_PREEMPT;
-
- if (*execlists->active)
- return CSB_COMPLETE;
-
- return CSB_NOP;
+ return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
}
static void process_csb(struct intel_engine_cs *engine)
@@ -1494,7 +1888,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
- enum csb_step csb_step;
+ bool promote;
if (++head == num_entries)
head = 0;
@@ -1522,20 +1916,16 @@ static void process_csb(struct intel_engine_cs *engine)
buf[2 * head + 0], buf[2 * head + 1]);
if (INTEL_GEN(engine->i915) >= 12)
- csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+ promote = gen12_csb_parse(execlists, buf + 2 * head);
else
- csb_step = gen8_csb_parse(execlists, buf + 2 * head);
-
- switch (csb_step) {
- case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
+ promote = gen8_csb_parse(execlists, buf + 2 * head);
+ if (promote) {
+ /* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", execlists->active);
-
while (*execlists->active)
execlists_schedule_out(*execlists->active++);
- /* fallthrough */
- case CSB_PROMOTE: /* switch pending to inflight */
- GEM_BUG_ON(*execlists->active);
+ /* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
execlists->active =
memcpy(execlists->inflight,
@@ -1550,9 +1940,10 @@ static void process_csb(struct intel_engine_cs *engine)
ring_set_paused(engine, 0);
WRITE_ONCE(execlists->pending[0], NULL);
- break;
+ } else {
+ GEM_BUG_ON(!*execlists->active);
- case CSB_COMPLETE: /* port0 completed, advanced to port1 */
+ /* port0 completed, advanced to port1 */
trace_ports(execlists, "completed", execlists->active);
/*
@@ -1567,10 +1958,6 @@ static void process_csb(struct intel_engine_cs *engine)
GEM_BUG_ON(execlists->active - execlists->inflight >
execlists_num_ports(execlists));
- break;
-
- case CSB_NOP:
- break;
}
} while (head != tail);
@@ -1726,14 +2113,13 @@ static void execlists_context_unpin(struct intel_context *ce)
check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
ce->engine);
- i915_gem_context_unpin_hw_id(ce->gem_context);
i915_gem_object_unpin_map(ce->state->obj);
intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
-__execlists_update_reg_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
+__execlists_update_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
{
struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
@@ -1741,16 +2127,16 @@ __execlists_update_reg_state(struct intel_context *ce,
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD + 1] = ring->head;
- regs[CTX_RING_TAIL + 1] = ring->tail;
+ regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_HEAD] = ring->head;
+ regs[CTX_RING_TAIL] = ring->tail;
/* RPCS */
if (engine->class == RENDER_CLASS) {
- regs[CTX_R_PWR_CLK_STATE + 1] =
+ regs[CTX_R_PWR_CLK_STATE] =
intel_sseu_make_rpcs(engine->i915, &ce->sseu);
- i915_oa_init_reg_state(engine, ce, regs);
+ i915_oa_init_reg_state(ce, engine);
}
}
@@ -1776,18 +2162,12 @@ __execlists_context_pin(struct intel_context *ce,
goto unpin_active;
}
- ret = i915_gem_context_pin_hw_id(ce->gem_context);
- if (ret)
- goto unpin_map;
-
ce->lrc_desc = lrc_descriptor(ce, engine);
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine);
return 0;
-unpin_map:
- i915_gem_object_unpin_map(ce->state->obj);
unpin_active:
intel_context_active_release(ce);
err:
@@ -1843,7 +2223,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
- GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+ GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -1859,7 +2239,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = i915_request_timeline(rq)->hwsp_offset;
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
@@ -1871,60 +2251,6 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
-static int emit_pdps(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->hw_context->vm);
- int err, i;
- u32 *cs;
-
- GEM_BUG_ON(intel_vgpu_active(rq->i915));
-
- /*
- * Beware ye of the dragons, this sequence is magic!
- *
- * Small changes to this sequence can cause anything from
- * GPU hangs to forcewake errors and machine lockups!
- */
-
- /* Flush any residual operations from the context load */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Magic required to prevent forcewake errors! */
- err = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (err)
- return err;
-
- cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
- for (i = GEN8_3LVL_PDPES; i--; ) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- u32 base = engine->mmio_base;
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- /* Be doubly sure the LRI have landed before proceeding */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Re-invalidate the TLB for luck */
- return engine->emit_flush(rq, EMIT_INVALIDATE);
-}
-
static int execlists_request_alloc(struct i915_request *request)
{
int ret;
@@ -1947,10 +2273,7 @@ static int execlists_request_alloc(struct i915_request *request)
*/
/* Unconditionally invalidate GPU caches and TLBs. */
- if (i915_vm_is_4lvl(request->hw_context->vm))
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- else
- ret = emit_pdps(request);
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
@@ -2002,12 +2325,6 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
-static u32 slm_offset(struct intel_engine_cs *engine)
-{
- return intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_CLEAR_SLM_WA);
-}
-
/*
* Typically we only have one indirect_ctx and per_ctx batch buffer which are
* initialized at the beginning and shared across all contexts but this field
@@ -2036,10 +2353,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* Actual scratch location is at 128 bytes offset */
batch = gen8_emit_pipe_control(batch,
PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_STORE_DATA_INDEX |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- slm_offset(engine));
+ LRC_PPHWSP_SCRATCH_ADDR);
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2399,10 +2716,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static struct i915_request *active_request(struct i915_request *rq)
{
- const struct list_head * const list = &rq->timeline->requests;
const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;
+ struct list_head *list;
+
+ if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+ return rq;
+ list = &i915_request_active_timeline(rq)->requests;
list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq))
break;
@@ -2416,6 +2737,17 @@ static struct i915_request *active_request(struct i915_request *rq)
return active;
}
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ if (INTEL_GEN(engine->i915) >= 9) {
+ regs[GEN9_CTX_RING_MI_MODE + 1] &= ~STOP_RING;
+ regs[GEN9_CTX_RING_MI_MODE + 1] |= STOP_RING << 16;
+ }
+}
+
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -2423,6 +2755,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
struct i915_request *rq;
u32 *regs;
+ mb(); /* paranoia: read the CSB pointers from after the reset */
+ clflush(execlists->csb_write);
+ mb();
+
process_csb(engine); /* drain preemption events */
/* Following the reset, we need to reload the CSB read/write pointers */
@@ -2440,6 +2776,10 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
ce = rq->hw_context;
GEM_BUG_ON(i915_active_is_idle(&ce->active));
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
+
+ /* Proclaim we have exclusive access to the context image! */
+ __context_pin_acquire(ce);
+
rq = active_request(rq);
if (!rq) {
ce->ring->head = ce->ring->tail;
@@ -2486,19 +2826,23 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
regs = ce->lrc_reg_state;
if (engine->pinned_default_state) {
memcpy(regs, /* skip restoring the vanilla PPHWSP */
engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
engine->context_size - PAGE_SIZE);
}
- execlists_init_reg_state(regs, ce, engine, ce->ring);
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
out_replay:
GEM_TRACE("%s replay {head:%04x, tail:%04x\n",
engine->name, ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
+ __execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine);
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+ __context_pin_release(ce);
unwind:
/* Push back any incomplete requests for replay after the reset. */
@@ -2552,12 +2896,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
__execlists_reset(engine, true);
/* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2565,10 +2905,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
+ mark_eio(rq);
__i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -2584,13 +2922,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
RB_CLEAR_NODE(rb);
spin_lock(&ve->base.active.lock);
- if (ve->request) {
- ve->request->engine = engine;
- __i915_request_submit(ve->request);
- dma_fence_set_error(&ve->request->fence, -EIO);
- i915_request_mark_complete(ve->request);
+ rq = fetch_and_zero(&ve->request);
+ if (rq) {
+ mark_eio(rq);
+
+ rq->engine = engine;
+ __i915_request_submit(rq);
+ i915_request_put(rq);
+
ve->base.execlists.queue_priority_hint = INT_MIN;
- ve->request = NULL;
}
spin_unlock(&ve->base.active.lock);
}
@@ -2723,7 +3063,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(request, cs);
@@ -2734,10 +3074,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
static int gen8_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- u32 scratch_addr =
- intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
@@ -2759,7 +3095,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@@ -2792,7 +3128,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
@@ -2805,11 +3141,6 @@ static int gen8_emit_flush_render(struct i915_request *request,
static int gen11_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- const u32 scratch_addr =
- intel_gt_scratch_offset(engine->gt,
- INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
-
if (mode & EMIT_FLUSH) {
u32 *cs;
u32 flags = 0;
@@ -2822,13 +3153,13 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(request, cs);
}
@@ -2846,13 +3177,81 @@ static int gen11_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(request, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+
+ return 0;
+}
+
+static u32 preparser_disable(bool state)
+{
+ return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static int gen12_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ if (mode & EMIT_FLUSH) {
+ u32 flags = 0;
+ u32 *cs;
+
+ flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ u32 flags = 0;
+ u32 *cs;
+
+ flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ cs = intel_ring_begin(request, 8);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Prevent the pre-parser from skipping past the TLB
+ * invalidate and loading a stale page for the batch
+ * buffer / request payload.
+ */
+ *cs++ = preparser_disable(true);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+ *cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
}
@@ -2907,7 +3306,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write(cs,
request->fence.seqno,
- request->timeline->hwsp_offset,
+ i915_request_active_timeline(request)->hwsp_offset,
0);
return gen8_emit_fini_breadcrumb_footer(request, cs);
@@ -2915,28 +3314,103 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
+ cs = gen8_emit_pipe_control(cs,
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE,
+ 0);
+
+ /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- request->timeline->hwsp_offset,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_CS_STALL);
+
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE);
-
- /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
- cs = gen8_emit_pipe_control(cs,
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_CS_STALL,
- 0);
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
return gen8_emit_fini_breadcrumb_footer(request, cs);
}
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
- u32 *cs)
+/*
+ * Note that the CS instruction pre-parser will not stall on the breadcrumb
+ * flush and will continue pre-fetching the instructions after it before the
+ * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
+ * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
+ * of the next request before the memory has been flushed, we're guaranteed that
+ * we won't access the batch itself too early.
+ * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
+ * so, if the current request is modifying an instruction in the next request on
+ * the same intel_context, we might pre-fetch and then execute the pre-update
+ * instruction. To avoid this, the users of self-modifying code should either
+ * disable the parser around the code emitting the memory writes, via a new flag
+ * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
+ * the in-kernel use-cases we've opted to use a separate context, see
+ * reloc_gpu() as an example.
+ * All the above applies only to the instructions themselves. Non-inline data
+ * used by the instructions is not pre-fetched.
+ */
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = intel_hws_preempt_address(request->engine);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+ *cs++ = MI_USER_INTERRUPT;
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ if (intel_engine_has_semaphores(request->engine))
+ cs = gen12_emit_preempt_busywait(request, cs);
+
+ request->tail = intel_ring_offset(request, cs);
+ assert_ring_tail_valid(request->ring, request->tail);
+
+ return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ 0);
+
+ return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- request->timeline->hwsp_offset,
+ i915_request_active_timeline(request)->hwsp_offset,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -2944,7 +3418,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- return gen8_emit_fini_breadcrumb_footer(request, cs);
+ return gen12_emit_fini_breadcrumb_footer(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
@@ -2972,6 +3446,9 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
}
+
+ if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
+ engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
}
static void execlists_destroy(struct intel_engine_cs *engine)
@@ -2999,6 +3476,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+ if (INTEL_GEN(engine->i915) >= 12)
+ engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
engine->set_default_submission = intel_execlists_set_default_submission;
@@ -3044,6 +3523,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
{
switch (INTEL_GEN(engine->i915)) {
case 12:
+ engine->emit_flush = gen12_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
+ break;
case 11:
engine->emit_flush = gen11_emit_flush_render;
engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
@@ -3116,7 +3598,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
return 0;
}
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
{
u32 indirect_ctx_offset;
@@ -3149,86 +3631,50 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
return indirect_ctx_offset;
}
-static void execlists_init_reg_state(u32 *regs,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
-{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
- bool rcs = engine->class == RENDER_CLASS;
- u32 base = engine->mmio_base;
-
- /*
- * A context is actually a big batch buffer with several
- * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
- * values we are setting here are only for the first context restore:
- * on a subsequent save, the GPU will recreate this batchbuffer with new
- * values (including all the missing MI_LOAD_REGISTER_IMM commands that
- * we are not initializing here).
- *
- * Must keep consistent with virtual_update_register_offsets().
- */
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+static void init_common_reg_state(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring)
+{
+ regs[CTX_CONTEXT_CONTROL] =
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
- if (INTEL_GEN(engine->i915) < 11) {
- regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ if (INTEL_GEN(engine->i915) < 11)
+ regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
CTX_CTRL_RS_CTX_ENABLE);
- }
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
- RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
- if (rcs) {
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
- RING_INDIRECT_CTX_OFFSET(base), 0);
- if (wa_ctx->indirect_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[CTX_RCS_INDIRECT_CTX + 1] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
- intel_lr_indirect_ctx_offset(engine) << 6;
- }
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
- if (wa_ctx->per_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+ regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ regs[CTX_BB_STATE] = RING_BB_PPGTT;
+}
- regs[CTX_BB_PER_CTX_PTR + 1] =
- (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
- }
+static void init_wa_bb_reg_state(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ u32 pos_bb_per_ctx)
+{
+ const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+
+ if (wa_ctx->per_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+ regs[pos_bb_per_ctx] =
+ (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ if (wa_ctx->indirect_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
- /* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+ regs[pos_bb_per_ctx + 2] =
+ (ggtt_offset + wa_ctx->indirect_ctx.offset) |
+ (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+ regs[pos_bb_per_ctx + 4] =
+ intel_lr_indirect_ctx_offset(engine) << 6;
+ }
+}
+
+static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
+{
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
@@ -3241,15 +3687,47 @@ static void execlists_init_reg_state(u32 *regs,
ASSIGN_CTX_PDP(ppgtt, regs, 1);
ASSIGN_CTX_PDP(ppgtt, regs, 0);
}
+}
+
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+ if (i915_is_ggtt(vm))
+ return i915_vm_to_ggtt(vm)->alias;
+ else
+ return i915_vm_to_ppgtt(vm);
+}
- if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+static void execlists_init_reg_state(u32 *regs,
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool close)
+{
+ /*
+ * A context is actually a big batch buffer with several
+ * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+ * values we are setting here are only for the first context restore:
+ * on a subsequent save, the GPU will recreate this batchbuffer with new
+ * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+ * we are not initializing here).
+ *
+ * Must keep consistent with virtual_update_register_offsets().
+ */
+ u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+
+ if (close) { /* Close the batch; used mainly by live_lrc_layout() */
+ *bbe = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *bbe |= BIT(0);
}
- regs[CTX_END] = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- regs[CTX_END] |= BIT(0);
+ init_common_reg_state(regs, engine, ring);
+ init_ppgtt_reg_state(regs, vm_alias(ce->vm));
+
+ init_wa_bb_reg_state(regs, engine,
+ INTEL_GEN(engine->i915) >= 12 ?
+ GEN12_CTX_BB_PER_CTX_PTR :
+ CTX_BB_PER_CTX_PTR);
}
static int
@@ -3258,6 +3736,7 @@ populate_lr_context(struct intel_context *ce,
struct intel_engine_cs *engine,
struct intel_ring *ring)
{
+ bool inhibit = true;
void *vaddr;
u32 *regs;
int ret;
@@ -3289,14 +3768,15 @@ populate_lr_context(struct intel_context *ce,
memcpy(vaddr + start, defaults + start, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ inhibit = false;
}
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring);
- if (!engine->default_state)
- regs[CTX_CONTEXT_CONTROL + 1] |=
+ execlists_init_reg_state(regs, ce, engine, ring, inhibit);
+ if (inhibit)
+ regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
ret = 0;
@@ -3436,8 +3916,9 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
return;
swap(ve->siblings[swp], ve->siblings[0]);
- virtual_update_register_offsets(ve->context.lrc_reg_state,
- ve->siblings[0]);
+ if (!intel_engine_has_relative_mmio(ve->siblings[0]))
+ virtual_update_register_offsets(ve->context.lrc_reg_state,
+ ve->siblings[0]);
}
static int virtual_context_pin(struct intel_context *ce)
@@ -3594,6 +4075,8 @@ submit_engine:
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ struct i915_request *old;
+ unsigned long flags;
GEM_TRACE("%s: rq=%llx:%lld\n",
ve->base.name,
@@ -3602,15 +4085,31 @@ static void virtual_submit_request(struct i915_request *rq)
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
- GEM_BUG_ON(ve->request);
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ spin_lock_irqsave(&ve->base.active.lock, flags);
- ve->base.execlists.queue_priority_hint = rq_prio(rq);
- WRITE_ONCE(ve->request, rq);
+ old = ve->request;
+ if (old) { /* background completion event from preempt-to-busy */
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
+
+ if (i915_request_completed(rq)) {
+ __i915_request_submit(rq);
- list_move_tail(&rq->sched.link, virtual_queue(ve));
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ ve->request = NULL;
+ } else {
+ ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ ve->request = i915_request_get(rq);
- tasklet_schedule(&ve->base.execlists.tasklet);
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ list_move_tail(&rq->sched.link, virtual_queue(ve));
+
+ tasklet_schedule(&ve->base.execlists.tasklet);
+ }
+
+ spin_unlock_irqrestore(&ve->base.active.lock, flags);
}
static struct ve_bond *
@@ -3631,18 +4130,22 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ intel_engine_mask_t allowed, exec;
struct ve_bond *bond;
+ allowed = ~to_request(signal)->engine->mask;
+
bond = virtual_find_bond(ve, to_request(signal)->engine);
- if (bond) {
- intel_engine_mask_t old, new, cmp;
+ if (bond)
+ allowed &= bond->sibling_mask;
- cmp = READ_ONCE(rq->execution_mask);
- do {
- old = cmp;
- new = cmp & bond->sibling_mask;
- } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
- }
+ /* Restrict the bonded request to run on only the available engines */
+ exec = READ_ONCE(rq->execution_mask);
+ while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+ ;
+
+ /* Prevent the master from being re-run on the bonded engines */
+ to_request(signal)->execution_mask &= ~allowed;
}
struct intel_context *
@@ -3689,6 +4192,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
+ intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
@@ -3851,6 +4355,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
return 0;
}
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+ unsigned int sibling)
+{
+ struct virtual_engine *ve = to_virtual_engine(engine);
+
+ if (sibling >= ve->num_siblings)
+ return NULL;
+
+ return ve->siblings[sibling];
+}
+
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
@@ -3939,6 +4455,9 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
u32 head,
bool scrub)
{
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+ __context_pin_acquire(ce);
+
/*
* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
@@ -3955,7 +4474,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
engine->context_size - PAGE_SIZE);
}
- execlists_init_reg_state(regs, ce, engine, ce->ring);
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
}
/* Rerun the request; its payload has been neutered (if guilty). */
@@ -3963,6 +4482,7 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
intel_ring_update_space(ce->ring);
__execlists_update_reg_state(ce, engine);
+ __context_pin_release(ce);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index c2bba82bcc16..99dc576a4e25 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -66,6 +66,12 @@ struct intel_engine_cs;
#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
+#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
+#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
+/* in Gen12 ID 0x7FF is reserved to indicate idle */
+#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)
+
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
INTEL_CONTEXT_SCHEDULE_OUT,
@@ -104,6 +110,10 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine);
*/
#define LRC_HEADER_PAGES LRC_PPHWSP_PN
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH 0x34
+#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
+
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
void intel_lr_context_reset(struct intel_engine_cs *engine,
@@ -131,4 +141,8 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
const struct intel_engine_cs *sibling);
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+ unsigned int sibling);
+
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index b8f20ad71169..06ab0276e10e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,55 +9,41 @@
#include <linux/types.h>
-/* GEN8+ Reg State Context */
-#define CTX_LRI_HEADER_0 0x01
-#define CTX_CONTEXT_CONTROL 0x02
-#define CTX_RING_HEAD 0x04
-#define CTX_RING_TAIL 0x06
-#define CTX_RING_BUFFER_START 0x08
-#define CTX_RING_BUFFER_CONTROL 0x0a
-#define CTX_BB_HEAD_U 0x0c
-#define CTX_BB_HEAD_L 0x0e
-#define CTX_BB_STATE 0x10
-#define CTX_SECOND_BB_HEAD_U 0x12
-#define CTX_SECOND_BB_HEAD_L 0x14
-#define CTX_SECOND_BB_STATE 0x16
-#define CTX_BB_PER_CTX_PTR 0x18
-#define CTX_RCS_INDIRECT_CTX 0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
-#define CTX_LRI_HEADER_1 0x21
-#define CTX_CTX_TIMESTAMP 0x22
-#define CTX_PDP3_UDW 0x24
-#define CTX_PDP3_LDW 0x26
-#define CTX_PDP2_UDW 0x28
-#define CTX_PDP2_LDW 0x2a
-#define CTX_PDP1_UDW 0x2c
-#define CTX_PDP1_LDW 0x2e
-#define CTX_PDP0_UDW 0x30
-#define CTX_PDP0_LDW 0x32
-#define CTX_LRI_HEADER_2 0x41
-#define CTX_R_PWR_CLK_STATE 0x42
-#define CTX_END 0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
- u32 *reg_state__ = (reg_state); \
- const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
- (reg_state__)[(pos__) + 1] = (val); \
-} while (0)
+/* GEN8 to GEN11 Reg State Context */
+#define CTX_CONTEXT_CONTROL (0x02 + 1)
+#define CTX_RING_HEAD (0x04 + 1)
+#define CTX_RING_TAIL (0x06 + 1)
+#define CTX_RING_BUFFER_START (0x08 + 1)
+#define CTX_RING_BUFFER_CONTROL (0x0a + 1)
+#define CTX_BB_STATE (0x10 + 1)
+#define CTX_BB_PER_CTX_PTR (0x18 + 1)
+#define CTX_PDP3_UDW (0x24 + 1)
+#define CTX_PDP3_LDW (0x26 + 1)
+#define CTX_PDP2_UDW (0x28 + 1)
+#define CTX_PDP2_LDW (0x2a + 1)
+#define CTX_PDP1_UDW (0x2c + 1)
+#define CTX_PDP1_LDW (0x2e + 1)
+#define CTX_PDP0_UDW (0x30 + 1)
+#define CTX_PDP0_LDW (0x32 + 1)
+#define CTX_R_PWR_CLK_STATE (0x42 + 1)
+
+#define GEN9_CTX_RING_MI_MODE 0x54
+
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1)
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
- (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \
} while (0)
#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = px_dma(ppgtt->pd); \
- (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \
} while (0)
#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
new file mode 100644
index 000000000000..71184aa72896
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -0,0 +1,712 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_sideband.h"
+
+/**
+ * DOC: RC6
+ *
+ * RC6 is a special power stage which allows the GPU to enter an very
+ * low-voltage mode when idle, using down to 0V while at this stage. This
+ * stage is entered automatically when the GPU is idle when RC6 support is
+ * enabled, and as soon as new workload arises GPU wakes up automatically as
+ * well.
+ *
+ * There are different RC6 modes available in Intel GPU, which differentiate
+ * among each other with the latency required to enter and leave RC6 and
+ * voltage consumed by the GPU in different states.
+ *
+ * The combination of the following flags define which states GPU is allowed
+ * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
+ * RC6pp is deepest RC6. Their support by hardware varies according to the
+ * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
+ * which brings the most power savings; deeper states save more power, but
+ * require higher latency to switch to and wake up.
+ */
+
+static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
+{
+ return container_of(rc6, struct intel_gt, rc6);
+}
+
+static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
+{
+ return rc6_to_gt(rc)->uncore;
+}
+
+static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
+{
+ return rc6_to_gt(rc)->i915;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+ intel_uncore_write_fw(uncore, reg, val);
+}
+
+static void gen11_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2b: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+
+ /*
+ * 2c: Program Coarse Power Gating Policies.
+ *
+ * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+ * use instead is a more conservative estimate for the maximum time
+ * it takes us to service a CS interrupt and submit a new ELSP - that
+ * is the time which the GPU is idle waiting for the CPU to select the
+ * next request to execute. If the idle hysteresis is less than that
+ * interrupt service latency, the hardware will automatically gate
+ * the power well and we will then incur the wake up cost on top of
+ * the service latency. A similar guide from plane_state is that we
+ * do not want the enable hysteresis to less than the wakeup latency.
+ *
+ * igt/gem_exec_nop/sequential provides a rough estimate for the
+ * service latency, and puts it around 10us for Broadwell (and other
+ * big core) and around 40us for Broxton (and other low power cores).
+ * [Note that for legacy ringbuffer submission, this is less than 1us!]
+ * However, the wakeup latency on Broxton is closer to 100us. To be
+ * conservative, we have to factor in a context switch on top (due
+ * to ksoftirqd).
+ */
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+ /* 3a: Enable RC6 */
+ set(uncore, GEN6_RC_CONTROL,
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ GEN6_RC_CTL_EI_MODE(1));
+
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE);
+}
+
+static void gen9_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 rc6_mode;
+
+ /* 2b: Program RC6 thresholds.*/
+ if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+ } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
+ /*
+ * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
+ * when CPG is enabled
+ */
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+ } else {
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+ }
+
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ /*
+ * 2c: Program Coarse Power Gating Policies.
+ *
+ * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+ * use instead is a more conservative estimate for the maximum time
+ * it takes us to service a CS interrupt and submit a new ELSP - that
+ * is the time which the GPU is idle waiting for the CPU to select the
+ * next request to execute. If the idle hysteresis is less than that
+ * interrupt service latency, the hardware will automatically gate
+ * the power well and we will then incur the wake up cost on top of
+ * the service latency. A similar guide from plane_state is that we
+ * do not want the enable hysteresis to less than the wakeup latency.
+ *
+ * igt/gem_exec_nop/sequential provides a rough estimate for the
+ * service latency, and puts it around 10us for Broadwell (and other
+ * big core) and around 40us for Broxton (and other low power cores).
+ * [Note that for legacy ringbuffer submission, this is less than 1us!]
+ * However, the wakeup latency on Broxton is closer to 100us. To be
+ * conservative, we have to factor in a context switch on top (due
+ * to ksoftirqd).
+ */
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+ /* 3a: Enable RC6 */
+ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+
+ /* WaRsUseTimeoutMode:cnl (pre-prod) */
+ if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
+ rc6_mode = GEN7_RC_CTL_TO_MODE;
+ else
+ rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+
+ set(uncore, GEN6_RC_CONTROL,
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ rc6_mode);
+
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+}
+
+static void gen8_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2b: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ set(uncore, GEN6_RC_SLEEP, 0);
+ set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+
+ /* 3: Enable RC6 */
+ set(uncore, GEN6_RC_CONTROL,
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN7_RC_CTL_TO_MODE |
+ GEN6_RC_CTL_RC6_ENABLE);
+}
+
+static void gen6_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 rc6vids, rc6_mask;
+ int ret;
+
+ set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+ set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+ for_each_engine(engine, i915, id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+ set(uncore, GEN6_RC1e_THRESHOLD, 1000);
+ if (IS_IVYBRIDGE(i915))
+ set(uncore, GEN6_RC6_THRESHOLD, 125000);
+ else
+ set(uncore, GEN6_RC6_THRESHOLD, 50000);
+ set(uncore, GEN6_RC6p_THRESHOLD, 150000);
+ set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+
+ /* We don't use those on Haswell */
+ rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+ if (HAS_RC6p(i915))
+ rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+ if (HAS_RC6pp(i915))
+ rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+ set(uncore, GEN6_RC_CONTROL,
+ rc6_mask |
+ GEN6_RC_CTL_EI_MODE(1) |
+ GEN6_RC_CTL_HW_ENABLE);
+
+ rc6vids = 0;
+ ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+ &rc6vids, NULL);
+ if (IS_GEN(i915, 6) && ret) {
+ DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+ } else if (IS_GEN(i915, 6) &&
+ (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+ DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+ GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+ rc6vids &= 0xffff00;
+ rc6vids |= GEN6_ENCODE_RC6_VID(450);
+ ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+ if (ret)
+ DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+ }
+}
+
+/* Check that the pcbr address is not empty. */
+static int chv_rc6_init(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ resource_size_t pctx_paddr, paddr;
+ resource_size_t pctx_size = 32 * SZ_1K;
+ u32 pcbr;
+
+ pcbr = intel_uncore_read(uncore, VLV_PCBR);
+ if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+ DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+ paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+ GEM_BUG_ON(paddr > U32_MAX);
+
+ pctx_paddr = (paddr & ~4095);
+ intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+ }
+
+ return 0;
+}
+
+static int vlv_rc6_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_gem_object *pctx;
+ resource_size_t pctx_paddr;
+ resource_size_t pctx_size = 24 * SZ_1K;
+ u32 pcbr;
+
+ pcbr = intel_uncore_read(uncore, VLV_PCBR);
+ if (pcbr) {
+ /* BIOS set it up already, grab the pre-alloc'd space */
+ resource_size_t pcbr_offset;
+
+ pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
+ pctx = i915_gem_object_create_stolen_for_preallocated(i915,
+ pcbr_offset,
+ I915_GTT_OFFSET_NONE,
+ pctx_size);
+ if (IS_ERR(pctx))
+ return PTR_ERR(pctx);
+
+ goto out;
+ }
+
+ DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+
+ /*
+ * From the Gunit register HAS:
+ * The Gfx driver is expected to program this register and ensure
+ * proper allocation within Gfx stolen memory. For example, this
+ * register should be programmed such than the PCBR range does not
+ * overlap with other ranges, such as the frame buffer, protected
+ * memory, or any other relevant ranges.
+ */
+ pctx = i915_gem_object_create_stolen(i915, pctx_size);
+ if (IS_ERR(pctx)) {
+ DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+ return PTR_ERR(pctx);
+ }
+
+ GEM_BUG_ON(range_overflows_t(u64,
+ i915->dsm.start,
+ pctx->stolen->start,
+ U32_MAX));
+ pctx_paddr = i915->dsm.start + pctx->stolen->start;
+ intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+
+out:
+ rc6->pctx = pctx;
+ return 0;
+}
+
+static void chv_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2a: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+ for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ /* TO threshold set to 500 us (0x186 * 1.28 us) */
+ set(uncore, GEN6_RC6_THRESHOLD, 0x186);
+
+ /* Allows RC6 residency counter to work */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+ /* 3: Enable RC6 */
+ set(uncore, GEN6_RC_CONTROL, GEN7_RC_CTL_TO_MODE);
+}
+
+static void vlv_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+ for_each_engine(engine, rc6_to_gt(rc6)->i915, id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GEN6_RC6_THRESHOLD, 0x557);
+
+ /* Allows RC6 residency counter to work */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC0_COUNT_EN |
+ VLV_RENDER_RC0_COUNT_EN |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+ set(uncore, GEN6_RC_CONTROL,
+ GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL);
+}
+
+static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ u32 rc6_ctx_base, rc_ctl, rc_sw_target;
+ bool enable_rc6 = true;
+
+ rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+ rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
+ rc_sw_target &= RC_SW_TARGET_STATE_MASK;
+ rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
+ DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+ "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+ onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+ onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+ rc_sw_target);
+
+ if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+ DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+ enable_rc6 = false;
+ }
+
+ /*
+ * The exact context size is not known for BXT, so assume a page size
+ * for this check.
+ */
+ rc6_ctx_base =
+ intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+ if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
+ rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
+ DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+ enable_rc6 = false;
+ }
+
+ if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
+ DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
+ !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
+ !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
+ DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
+ DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
+ DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ return enable_rc6;
+}
+
+static bool rc6_supported(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!HAS_RC6(i915))
+ return false;
+
+ if (intel_vgpu_active(i915))
+ return false;
+
+ if (is_mock_gt(rc6_to_gt(rc6)))
+ return false;
+
+ if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
+ dev_notice(i915->drm.dev,
+ "RC6 and powersaving disabled by BIOS\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void rpm_get(struct intel_rc6 *rc6)
+{
+ GEM_BUG_ON(rc6->wakeref);
+ pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
+ rc6->wakeref = true;
+}
+
+static void rpm_put(struct intel_rc6 *rc6)
+{
+ GEM_BUG_ON(!rc6->wakeref);
+ pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
+ rc6->wakeref = false;
+}
+
+static void __intel_rc6_disable(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ if (INTEL_GEN(i915) >= 9)
+ set(uncore, GEN9_PG_ENABLE, 0);
+ set(uncore, GEN6_RC_CONTROL, 0);
+ set(uncore, GEN6_RC_STATE, 0);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+void intel_rc6_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ int err;
+
+ /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
+ rpm_get(rc6);
+
+ if (!rc6_supported(rc6))
+ return;
+
+ if (IS_CHERRYVIEW(i915))
+ err = chv_rc6_init(rc6);
+ else if (IS_VALLEYVIEW(i915))
+ err = vlv_rc6_init(rc6);
+ else
+ err = 0;
+
+ /* Sanitize rc6, ensure it is disabled before we are ready. */
+ __intel_rc6_disable(rc6);
+
+ rc6->supported = err == 0;
+}
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6)
+{
+ if (rc6->supported)
+ __intel_rc6_disable(rc6);
+}
+
+void intel_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->supported)
+ return;
+
+ GEM_BUG_ON(rc6->enabled);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ if (IS_CHERRYVIEW(i915))
+ chv_rc6_enable(rc6);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 11)
+ gen11_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 9)
+ gen9_rc6_enable(rc6);
+ else if (IS_BROADWELL(i915))
+ gen8_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_rc6_enable(rc6);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ /* rc6 is ready, runtime-pm is go! */
+ rpm_put(rc6);
+ rc6->enabled = true;
+}
+
+void intel_rc6_disable(struct intel_rc6 *rc6)
+{
+ if (!rc6->enabled)
+ return;
+
+ rpm_get(rc6);
+ rc6->enabled = false;
+
+ __intel_rc6_disable(rc6);
+}
+
+void intel_rc6_fini(struct intel_rc6 *rc6)
+{
+ struct drm_i915_gem_object *pctx;
+
+ intel_rc6_disable(rc6);
+
+ pctx = fetch_and_zero(&rc6->pctx);
+ if (pctx)
+ i915_gem_object_put(pctx);
+
+ if (rc6->wakeref)
+ rpm_put(rc6);
+}
+
+static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
+{
+ u32 lower, upper, tmp;
+ int loop = 2;
+
+ /*
+ * The register accessed do not need forcewake. We borrow
+ * uncore lock to prevent concurrent access to range reg.
+ */
+ lockdep_assert_held(&uncore->lock);
+
+ /*
+ * vlv and chv residency counters are 40 bits in width.
+ * With a control bit, we can choose between upper or lower
+ * 32bit window into this counter.
+ *
+ * Although we always use the counter in high-range mode elsewhere,
+ * userspace may attempt to read the value before rc6 is initialised,
+ * before we have set the default VLV_COUNTER_CONTROL value. So always
+ * set the high bit to be safe.
+ */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ upper = intel_uncore_read_fw(uncore, reg);
+ do {
+ tmp = upper;
+
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+ lower = intel_uncore_read_fw(uncore, reg);
+
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ upper = intel_uncore_read_fw(uncore, reg);
+ } while (upper != tmp && --loop);
+
+ /*
+ * Everywhere else we always use VLV_COUNTER_CONTROL with the
+ * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
+ * now.
+ */
+
+ return lower | (u64)upper << 8;
+}
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ u64 time_hw, prev_hw, overflow_hw;
+ unsigned int fw_domains;
+ unsigned long flags;
+ unsigned int i;
+ u32 mul, div;
+
+ if (!rc6->supported)
+ return 0;
+
+ /*
+ * Store previous hw counter values for counter wrap-around handling.
+ *
+ * There are only four interesting registers and they live next to each
+ * other so we can use the relative address, compared to the smallest
+ * one as the index into driver storage.
+ */
+ i = (i915_mmio_reg_offset(reg) -
+ i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
+ if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
+ return 0;
+
+ fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
+
+ spin_lock_irqsave(&uncore->lock, flags);
+ intel_uncore_forcewake_get__locked(uncore, fw_domains);
+
+ /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ mul = 1000000;
+ div = i915->czclk_freq;
+ overflow_hw = BIT_ULL(40);
+ time_hw = vlv_residency_raw(uncore, reg);
+ } else {
+ /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
+ if (IS_GEN9_LP(i915)) {
+ mul = 10000;
+ div = 12;
+ } else {
+ mul = 1280;
+ div = 1;
+ }
+
+ overflow_hw = BIT_ULL(32);
+ time_hw = intel_uncore_read_fw(uncore, reg);
+ }
+
+ /*
+ * Counter wrap handling.
+ *
+ * But relying on a sufficient frequency of queries otherwise counters
+ * can still wrap.
+ */
+ prev_hw = rc6->prev_hw_residency[i];
+ rc6->prev_hw_residency[i] = time_hw;
+
+ /* RC6 delta from last sample. */
+ if (time_hw >= prev_hw)
+ time_hw -= prev_hw;
+ else
+ time_hw += overflow_hw - prev_hw;
+
+ /* Add delta to RC6 extended raw driver copy. */
+ time_hw += rc6->cur_residency[i];
+ rc6->cur_residency[i] = time_hw;
+
+ intel_uncore_forcewake_put__locked(uncore, fw_domains);
+ spin_unlock_irqrestore(&uncore->lock, flags);
+
+ return mul_u64_u32_div(time_hw, mul, div);
+}
+
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
+{
+ return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
new file mode 100644
index 000000000000..5e6711f36457
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -0,0 +1,25 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_H
+#define INTEL_RC6_H
+
+#include "i915_reg.h"
+
+struct intel_engine_cs;
+struct intel_rc6;
+
+void intel_rc6_init(struct intel_rc6 *rc6);
+void intel_rc6_fini(struct intel_rc6 *rc6);
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6);
+void intel_rc6_enable(struct intel_rc6 *rc6);
+void intel_rc6_disable(struct intel_rc6 *rc6);
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
+
+#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
new file mode 100644
index 000000000000..214f354d6ae4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_TYPES_H
+#define INTEL_RC6_TYPES_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "intel_engine_types.h"
+
+struct drm_i915_gem_object;
+
+struct intel_rc6 {
+ u64 prev_hw_residency[4];
+ u64 cur_residency[4];
+
+ struct drm_i915_gem_object *pctx;
+
+ bool supported : 1;
+ bool enabled : 1;
+ bool wakeref : 1;
+};
+
+#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b9d84d52e986..7b3d9d4517a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context;
- lockdep_assert_held(&engine->active.lock);
-
if (!i915_request_is_active(rq))
return;
+ lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO);
@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rq->fence.seqno,
yesno(guilty));
- lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq));
if (guilty) {
@@ -309,7 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
return gen6_hw_domain_reset(gt, hw_mask);
}
-static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
+static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
{
struct intel_uncore *uncore = engine->uncore;
u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
@@ -318,6 +316,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
i915_reg_t sfc_usage;
u32 sfc_usage_bit;
u32 sfc_reset_bit;
+ int ret;
switch (engine->class) {
case VIDEO_DECODE_CLASS:
@@ -352,27 +351,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
}
/*
- * Tell the engine that a software reset is going to happen. The engine
- * will then try to force lock the SFC (if currently locked, it will
- * remain so until we tell the engine it is safe to unlock; if currently
- * unlocked, it will ignore this and all new lock requests). If SFC
- * ends up being locked to the engine we want to reset, we have to reset
- * it as well (we will unlock it once the reset sequence is completed).
+ * If the engine is using a SFC, tell the engine that a software reset
+ * is going to happen. The engine will then try to force lock the SFC.
+ * If SFC ends up being locked to the engine we want to reset, we have
+ * to reset it as well (we will unlock it once the reset sequence is
+ * completed).
*/
+ if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
+ return 0;
+
rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
- if (__intel_wait_for_register_fw(uncore,
- sfc_forced_lock_ack,
- sfc_forced_lock_ack_bit,
- sfc_forced_lock_ack_bit,
- 1000, 0, NULL)) {
- DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ ret = __intel_wait_for_register_fw(uncore,
+ sfc_forced_lock_ack,
+ sfc_forced_lock_ack_bit,
+ sfc_forced_lock_ack_bit,
+ 1000, 0, NULL);
+
+ /* Was the SFC released while we were trying to lock it? */
+ if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
return 0;
- }
- if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
- return sfc_reset_bit;
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ return ret;
+ }
+ *hw_mask |= sfc_reset_bit;
return 0;
}
@@ -430,12 +435,21 @@ static int gen11_reset_engines(struct intel_gt *gt,
for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
- hw_mask |= gen11_lock_sfc(engine);
+ ret = gen11_lock_sfc(engine, &hw_mask);
+ if (ret)
+ goto sfc_unlock;
}
}
ret = gen6_hw_domain_reset(gt, hw_mask);
+sfc_unlock:
+ /*
+ * We unlock the SFC based on the lock status and not the result of
+ * gen11_lock_sfc to make sure that we clean properly if something
+ * wrong happened during the lock (e.g. lock acquired after timeout
+ * expiration).
+ */
if (engine_mask != ALL_ENGINES)
for_each_engine_masked(engine, gt->i915, engine_mask, tmp)
gen11_unlock_sfc(engine);
@@ -528,13 +542,24 @@ skip_reset:
return ret;
}
+static int mock_reset(struct intel_gt *gt,
+ intel_engine_mask_t mask,
+ unsigned int retry)
+{
+ return 0;
+}
+
typedef int (*reset_func)(struct intel_gt *,
intel_engine_mask_t engine_mask,
unsigned int retry);
-static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
{
- if (INTEL_GEN(i915) >= 8)
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (is_mock_gt(gt))
+ return mock_reset;
+ else if (INTEL_GEN(i915) >= 8)
return gen8_reset_engines;
else if (INTEL_GEN(i915) >= 6)
return gen6_reset_engines;
@@ -557,7 +582,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
int ret = -ETIMEDOUT;
int retry;
- reset = intel_get_gpu_reset(gt->i915);
+ reset = intel_get_gpu_reset(gt);
if (!reset)
return -ENODEV;
@@ -577,17 +602,20 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
return ret;
}
-bool intel_has_gpu_reset(struct drm_i915_private *i915)
+bool intel_has_gpu_reset(const struct intel_gt *gt)
{
if (!i915_modparams.reset)
return NULL;
- return intel_get_gpu_reset(i915);
+ return intel_get_gpu_reset(gt);
}
-bool intel_has_reset_engine(struct drm_i915_private *i915)
+bool intel_has_reset_engine(const struct intel_gt *gt)
{
- return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
+ if (i915_modparams.reset < 2)
+ return false;
+
+ return INTEL_INFO(gt->i915)->has_reset_engine;
}
int intel_reset_guc(struct intel_gt *gt)
@@ -697,7 +725,7 @@ static void reset_finish_engine(struct intel_engine_cs *engine)
engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
- intel_engine_signal_breadcrumbs(engine);
+ intel_engine_breadcrumbs_irq(engine);
}
static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
@@ -793,11 +821,13 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl;
unsigned long flags;
+ bool ok;
if (!test_bit(I915_WEDGED, &gt->reset.flags))
return true;
- if (!gt->scratch) /* Never full initialised, recovery impossible */
+ /* Never fully initialised, recovery impossible */
+ if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
return false;
GEM_TRACE("start\n");
@@ -814,10 +844,10 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
*/
spin_lock_irqsave(&timelines->lock, flags);
list_for_each_entry(tl, &timelines->active_list, link) {
- struct i915_request *rq;
+ struct dma_fence *fence;
- rq = i915_active_request_get_unlocked(&tl->last_request);
- if (!rq)
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
continue;
spin_unlock_irqrestore(&timelines->lock, flags);
@@ -829,8 +859,8 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
* (I915_FENCE_TIMEOUT) so this wait should not be unbounded
* in the worst case.
*/
- dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
+ dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
/* Restart iteration after droping lock */
spin_lock_irqsave(&timelines->lock, flags);
@@ -838,7 +868,12 @@ static bool __intel_gt_unset_wedged(struct intel_gt *gt)
}
spin_unlock_irqrestore(&timelines->lock, flags);
- intel_gt_sanitize(gt, false);
+ /* We must reset pending GPU events before restoring our submission */
+ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
+ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ if (!ok)
+ return false;
/*
* Undo nop_submit_request. We prevent all new i915 requests from
@@ -943,7 +978,7 @@ void intel_gt_reset(struct intel_gt *gt,
awake = reset_prepare(gt);
- if (!intel_has_gpu_reset(gt->i915)) {
+ if (!intel_has_gpu_reset(gt)) {
if (i915_modparams.reset)
dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
else
@@ -972,7 +1007,7 @@ void intel_gt_reset(struct intel_gt *gt,
* was running at the time of the reset (i.e. we weren't VT
* switched away).
*/
- ret = i915_gem_init_hw(gt->i915);
+ ret = intel_gt_init_hw(gt);
if (ret) {
DRM_ERROR("Failed to initialise HW following reset (%d)\n",
ret);
@@ -1164,7 +1199,7 @@ void intel_gt_handle_error(struct intel_gt *gt,
* Try engine reset when available. We fall back to full reset if
* single reset fails.
*/
- if (intel_has_reset_engine(gt->i915) && !intel_gt_is_wedged(gt)) {
+ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
for_each_engine_masked(engine, gt->i915, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
@@ -1214,10 +1249,8 @@ out:
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
-int intel_gt_reset_trylock(struct intel_gt *gt)
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{
- int srcu;
-
might_lock(&gt->reset.backoff_srcu);
might_sleep();
@@ -1232,10 +1265,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
rcu_read_lock();
}
- srcu = srcu_read_lock(&gt->reset.backoff_srcu);
+ *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock();
- return srcu;
+ return 0;
}
void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
@@ -1255,10 +1288,6 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
if (!test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
return -EIO;
- /* XXX intel_reset_finish() still takes struct_mutex!!! */
- if (mutex_is_locked(&gt->i915->drm.struct_mutex))
- return -EAGAIN;
-
if (wait_event_interruptible(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF,
&gt->reset.flags)))
@@ -1267,6 +1296,14 @@ int intel_gt_terminally_wedged(struct intel_gt *gt)
return intel_gt_is_wedged(gt) ? -EIO : 0;
}
+void intel_gt_set_wedged_on_init(struct intel_gt *gt)
+{
+ BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
+ I915_WEDGED_ON_INIT);
+ intel_gt_set_wedged(gt);
+ set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
void intel_gt_init_reset(struct intel_gt *gt)
{
init_waitqueue_head(&gt->reset.queue);
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 37a987b17108..8e8d5f761166 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -14,7 +14,6 @@
#include "intel_engine_types.h"
#include "intel_reset_types.h"
-struct drm_i915_private;
struct i915_request;
struct intel_engine_cs;
struct intel_gt;
@@ -38,13 +37,19 @@ int intel_engine_reset(struct intel_engine_cs *engine,
void __i915_request_reset(struct i915_request *rq, bool guilty);
-int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
+int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
void intel_gt_set_wedged(struct intel_gt *gt);
bool intel_gt_unset_wedged(struct intel_gt *gt);
int intel_gt_terminally_wedged(struct intel_gt *gt);
+/*
+ * There's no unset_wedged_on_init paired with this one.
+ * Once we're wedged on init, there's no going back.
+ */
+void intel_gt_set_wedged_on_init(struct intel_gt *gt);
+
int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
int intel_reset_guc(struct intel_gt *gt);
@@ -68,10 +73,13 @@ void __intel_fini_wedge(struct intel_wedge_me *w);
static inline bool __intel_reset_failed(const struct intel_reset *reset)
{
+ GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ?
+ !test_bit(I915_WEDGED, &reset->flags) : false);
+
return unlikely(test_bit(I915_WEDGED, &reset->flags));
}
-bool intel_has_gpu_reset(struct drm_i915_private *i915);
-bool intel_has_reset_engine(struct drm_i915_private *i915);
+bool intel_has_gpu_reset(const struct intel_gt *gt);
+bool intel_has_reset_engine(const struct intel_gt *gt);
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
index 31968356e0c0..f43bc3a0fe4f 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -29,11 +29,17 @@ struct intel_reset {
* we set the #I915_WEDGED bit. Prior to command submission, e.g.
* i915_request_alloc(), this bit is checked and the sequence
* aborted (with -EIO reported to userspace) if set.
+ *
+ * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no
+ * longer use the GPU - similar to #I915_WEDGED bit. The difference in
+ * in the way we're handling "forced" unwedged (e.g. through debugfs),
+ * which is not allowed in case we failed to initialize.
*/
unsigned long flags;
#define I915_RESET_BACKOFF 0
#define I915_RESET_MODESET 1
#define I915_RESET_ENGINE 2
+#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2)
#define I915_WEDGED (BITS_PER_LONG - 1)
struct mutex mutex; /* serialises wedging/unwedging */
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 601c16239fdf..311fdc0a21bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -322,7 +322,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset |
+ PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_USER_INTERRUPT;
@@ -425,7 +426,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset;
*cs++ = rq->fence.seqno;
*cs++ = MI_USER_INTERRUPT;
@@ -439,8 +440,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -459,8 +460,8 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -930,6 +931,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
static void i9xx_submit_request(struct i915_request *request)
{
i915_request_submit(request);
+ wmb(); /* paranoid flush writes out of the WCB before mmio */
ENGINE_WRITE(request->engine, RING_TAIL,
intel_ring_set_tail(request->ring, request->tail));
@@ -937,8 +939,8 @@ static void i9xx_submit_request(struct i915_request *request)
static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
@@ -960,8 +962,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
@@ -1272,7 +1274,7 @@ static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
struct i915_vma *vma;
obj = i915_gem_object_create_stolen(i915, size);
- if (!obj)
+ if (IS_ERR(obj))
obj = i915_gem_object_create_internal(i915, size);
if (IS_ERR(obj))
return ERR_CAST(obj);
@@ -1336,15 +1338,13 @@ void intel_ring_free(struct kref *ref)
{
struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
- i915_vma_close(ring->vma);
i915_vma_put(ring->vma);
-
kfree(ring);
}
static void __ring_context_fini(struct intel_context *ce)
{
- i915_gem_object_put(ce->state->obj);
+ i915_vma_put(ce->state);
}
static void ring_context_destroy(struct kref *ref)
@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_engines =
- IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
@@ -1741,46 +1741,22 @@ static int remap_l3(struct i915_request *rq)
static int switch_context(struct i915_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
- struct i915_address_space *vm = vm_alias(rq->hw_context);
- unsigned int unwind_mm = 0;
- u32 hw_flags = 0;
+ struct intel_context *ce = rq->hw_context;
+ struct i915_address_space *vm = vm_alias(ce);
int ret;
GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
if (vm) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- int loops;
-
- /*
- * Baytail takes a little more convincing that it really needs
- * to reload the PD between contexts. It is not just a little
- * longer, as adding more stalls after the load_pd_dir (i.e.
- * adding a long loop around flush_pd_dir) is not as effective
- * as reloading the PD umpteen times. 32 is derived from
- * experimentation (gem_exec_parallel/fds) and has no good
- * explanation.
- */
- loops = 1;
- if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
- loops = 32;
-
- do {
- ret = load_pd_dir(rq, ppgtt);
- if (ret)
- goto err;
- } while (--loops);
-
- if (ppgtt->pd_dirty_engines & engine->mask) {
- unwind_mm = engine->mask;
- ppgtt->pd_dirty_engines &= ~unwind_mm;
- hw_flags = MI_FORCE_RESTORE;
- }
+ ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm));
+ if (ret)
+ return ret;
}
- if (rq->hw_context->state) {
- GEM_BUG_ON(engine->id != RCS0);
+ if (ce->state) {
+ u32 hw_flags;
+
+ GEM_BUG_ON(rq->engine->id != RCS0);
/*
* The kernel context(s) is treated as pure scratch and is not
@@ -1789,22 +1765,25 @@ static int switch_context(struct i915_request *rq)
* as nothing actually executes using the kernel context; it
* is purely used for flushing user contexts.
*/
+ hw_flags = 0;
if (i915_gem_context_is_kernel(rq->gem_context))
hw_flags = MI_RESTORE_INHIBIT;
ret = mi_set_context(rq, hw_flags);
if (ret)
- goto err_mm;
+ return ret;
}
if (vm) {
+ struct intel_engine_cs *engine = rq->engine;
+
ret = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret)
- goto err_mm;
+ return ret;
ret = flush_pd_dir(rq);
if (ret)
- goto err_mm;
+ return ret;
/*
* Not only do we need a full barrier (post-sync write) after
@@ -1816,24 +1795,18 @@ static int switch_context(struct i915_request *rq)
*/
ret = engine->emit_flush(rq, EMIT_INVALIDATE);
if (ret)
- goto err_mm;
+ return ret;
ret = engine->emit_flush(rq, EMIT_FLUSH);
if (ret)
- goto err_mm;
+ return ret;
}
ret = remap_l3(rq);
if (ret)
- goto err_mm;
+ return ret;
return 0;
-
-err_mm:
- if (unwind_mm)
- i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-err:
- return ret;
}
static int ring_request_alloc(struct i915_request *request)
@@ -1841,7 +1814,7 @@ static int ring_request_alloc(struct i915_request *request)
int ret;
GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
- GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
+ GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -1952,7 +1925,9 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
*/
GEM_BUG_ON(!rq->reserved_space);
- ret = wait_for_space(ring, rq->timeline, total_bytes);
+ ret = wait_for_space(ring,
+ i915_request_timeline(rq),
+ total_bytes);
if (unlikely(ret))
return ERR_PTR(ret);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6bf2d87da109..74f793423231 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -8,6 +8,19 @@
#include "intel_lrc_reg.h"
#include "intel_sseu.h"
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice)
+{
+ sseu->max_slices = max_slices;
+ sseu->max_subslices = max_subslices;
+ sseu->max_eus_per_subslice = max_eus_per_subslice;
+
+ sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+ GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
+ sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+ GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
+}
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
{
@@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
return total;
}
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+{
+ int i, offset = slice * sseu->ss_stride;
+ u32 mask = 0;
+
+ GEM_BUG_ON(slice >= sseu->max_slices);
+
+ for (i = 0; i < sseu->ss_stride; i++)
+ mask |= (u32)sseu->subslice_mask[offset + i] <<
+ i * BITS_PER_BYTE;
+
+ return mask;
+}
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask)
+{
+ int offset = slice * sseu->ss_stride;
+
+ memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
+}
+
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
{
- return hweight8(sseu->subslice_mask[slice]);
+ return hweight32(intel_sseu_get_subslices(sseu, slice));
}
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index b50d0401a4e2..d1d225204f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -10,15 +10,21 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include "i915_gem.h"
+
struct drm_i915_private;
#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
+#define GEN_MAX_EUS (16) /* TGL upper bound */
+#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
struct sseu_dev_info {
u8 slice_mask;
- u8 subslice_mask[GEN_MAX_SLICES];
+ u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -33,11 +39,8 @@ struct sseu_dev_info {
u8 max_subslices;
u8 max_eus_per_subslice;
- /* We don't have more than 8 eus per subslice at the moment and as we
- * store eus enabled using bits, no need to multiply by eus per
- * subslice.
- */
- u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+ u8 ss_stride;
+ u8 eu_stride;
};
/*
@@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
return value;
}
+static inline bool
+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+ int subslice)
+{
+ u8 mask;
+ int ss_idx = subslice / BITS_PER_BYTE;
+
+ GEM_BUG_ON(ss_idx >= sseu->ss_stride);
+
+ mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+
+ return mask & BIT(subslice % BITS_PER_BYTE);
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice);
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask);
+
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
const struct intel_sseu *req_sseu);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 9cb01d9828f1..0f959694303c 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -136,6 +136,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
kfree(cl);
}
+__i915_active_call
static void __cacheline_retire(struct i915_active *active)
{
struct intel_timeline_cacheline *cl =
@@ -177,8 +178,7 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
cl->hwsp = hwsp;
cl->vaddr = page_pack_bits(vaddr, cacheline);
- i915_active_init(hwsp->gt->i915, &cl->active,
- __cacheline_active, __cacheline_retire);
+ i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
return cl;
}
@@ -254,7 +254,7 @@ int intel_timeline_init(struct intel_timeline *timeline,
mutex_init(&timeline->mutex);
- INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
@@ -442,7 +442,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
* free it after the current request is retired, which ensures that
* all writes into the cacheline from previous requests are complete.
*/
- err = i915_active_ref(&tl->hwsp_cacheline->active, tl, rq);
+ err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
if (err)
goto err_cacheline;
@@ -493,24 +493,39 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
static int cacheline_ref(struct intel_timeline_cacheline *cl,
struct i915_request *rq)
{
- return i915_active_ref(&cl->active, rq->timeline, rq);
+ return i915_active_add_request(&cl->active, rq);
}
int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to,
u32 *hwsp)
{
- struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
- struct intel_timeline *tl = from->timeline;
+ struct intel_timeline *tl;
int err;
- GEM_BUG_ON(to->timeline == tl);
+ rcu_read_lock();
+ tl = rcu_dereference(from->timeline);
+ if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+ if (!tl) /* already completed */
+ return 1;
+
+ GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
+
+ err = -EBUSY;
+ if (mutex_trylock(&tl->mutex)) {
+ struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
+
+ if (i915_request_completed(from)) {
+ err = 1;
+ goto unlock;
+ }
- mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
- err = i915_request_completed(from);
- if (!err)
err = cacheline_ref(cl, to);
- if (!err) {
+ if (err)
+ goto unlock;
+
if (likely(cl == tl->hwsp_cacheline)) {
*hwsp = tl->hwsp_offset;
} else { /* across a seqno wrap, recover the original offset */
@@ -518,8 +533,11 @@ int intel_timeline_read_hwsp(struct i915_request *from,
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) *
CACHELINE_BYTES;
}
+
+unlock:
+ mutex_unlock(&tl->mutex);
}
- mutex_unlock(&tl->mutex);
+ intel_timeline_put(tl);
return err;
}
@@ -541,7 +559,7 @@ void __intel_timeline_free(struct kref *kref)
container_of(kref, typeof(*timeline), kref);
intel_timeline_fini(timeline);
- kfree(timeline);
+ kfree_rcu(timeline, rcu);
}
static void timelines_fini(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 2b1baf2fcc8e..98d9ee166379 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -58,12 +58,13 @@ struct intel_timeline {
*/
struct list_head requests;
- /* Contains an RCU guarded pointer to the last request. No reference is
+ /*
+ * Contains an RCU guarded pointer to the last request. No reference is
* held to the request, users must carefully acquire a reference to
- * the request using i915_active_request_get_request_rcu(), or hold the
- * struct_mutex.
+ * the request using i915_active_fence_get(), or manage the RCU
+ * protection themselves (cf the i915_active_fence API).
*/
- struct i915_active_request last_request;
+ struct i915_active_fence last_request;
/**
* We track the most recent seqno that we wait on in every context so
@@ -80,6 +81,7 @@ struct intel_timeline {
struct intel_gt *gt;
struct kref kref;
+ struct rcu_head rcu;
};
#endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 45481eb1fa3c..ba65e5018978 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -567,6 +567,9 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ /* Wa_1409142259 */
+ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
+ GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
}
static void
@@ -796,11 +799,10 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
}
slice = fls(sseu->slice_mask) - 1;
- GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
- subslice = fls(l3_en & sseu->subslice_mask[slice]);
+ subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
if (!subslice) {
DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
- sseu->subslice_mask[slice], l3_en);
+ intel_sseu_get_subslices(sseu, slice), l3_en);
subslice = fls(l3_en);
WARN_ON(!subslice);
}
@@ -1063,6 +1065,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1);
+
+ /* WaSendPushConstantsFromMMIO:skl,bxt */
+ whitelist_reg(w, COMMON_SLICE_CHICKEN2);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
@@ -1449,7 +1454,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset)
* which only controls CPU initiated MMIO. Routing does not
* work for CS access so we cannot verify them on this path.
*/
- if (INTEL_GEN(i915) >= 8 && (offset >= 0xb100 && offset <= 0xb3ff))
+ if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
return true;
return false;
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
index 9d1ea26c7a2d..7c838a57e174 100644
--- a/drivers/gpu/drm/i915/gt/selftest_context.c
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -14,22 +14,28 @@
static int request_sync(struct i915_request *rq)
{
+ struct intel_timeline *tl = i915_request_timeline(rq);
long timeout;
int err = 0;
+ intel_timeline_get(tl);
i915_request_get(rq);
- i915_request_add(rq);
+ /* Opencode i915_request_add() so we can keep the timeline locked. */
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, NULL);
+
timeout = i915_request_wait(rq, 0, HZ / 10);
- if (timeout < 0) {
+ if (timeout < 0)
err = timeout;
- } else {
- mutex_lock(&rq->timeline->mutex);
+ else
i915_request_retire_upto(rq);
- mutex_unlock(&rq->timeline->mutex);
- }
+
+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
+ mutex_unlock(&tl->mutex);
i915_request_put(rq);
+ intel_timeline_put(tl);
return err;
}
@@ -41,24 +47,20 @@ static int context_sync(struct intel_context *ce)
mutex_lock(&tl->mutex);
do {
- struct i915_request *rq;
+ struct dma_fence *fence;
long timeout;
- rcu_read_lock();
- rq = rcu_dereference(tl->last_request.request);
- if (rq)
- rq = i915_request_get_rcu(rq);
- rcu_read_unlock();
- if (!rq)
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
break;
- timeout = i915_request_wait(rq, 0, HZ / 10);
+ timeout = dma_fence_wait_timeout(fence, false, HZ / 10);
if (timeout < 0)
err = timeout;
else
- i915_request_retire_upto(rq);
+ i915_request_retire_upto(to_request(fence));
- i915_request_put(rq);
+ dma_fence_put(fence);
} while (!err);
mutex_unlock(&tl->mutex);
@@ -153,13 +155,9 @@ static int live_context_size(void *arg)
* HW tries to write past the end of one.
*/
- mutex_lock(&gt->i915->drm.struct_mutex);
-
fixme = kernel_context(gt->i915);
- if (IS_ERR(fixme)) {
- err = PTR_ERR(fixme);
- goto unlock;
- }
+ if (IS_ERR(fixme))
+ return PTR_ERR(fixme);
for_each_engine(engine, gt->i915, id) {
struct {
@@ -199,8 +197,6 @@ static int live_context_size(void *arg)
}
kernel_context_close(fixme);
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
return err;
}
@@ -303,12 +299,10 @@ static int live_active_context(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
-
fixme = live_context(gt->i915, file);
if (IS_ERR(fixme)) {
err = PTR_ERR(fixme);
- goto unlock;
+ goto out_file;
}
for_each_engine(engine, gt->i915, id) {
@@ -316,13 +310,12 @@ static int live_active_context(void *arg)
if (err)
break;
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
+out_file:
mock_file_free(gt->i915, file);
return err;
}
@@ -416,12 +409,10 @@ static int live_remote_context(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
-
fixme = live_context(gt->i915, file);
if (IS_ERR(fixme)) {
err = PTR_ERR(fixme);
- goto unlock;
+ goto out_file;
}
for_each_engine(engine, gt->i915, id) {
@@ -429,13 +420,12 @@ static int live_remote_context(void *arg)
if (err)
break;
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
+out_file:
mock_file_free(gt->i915, file);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
new file mode 100644
index 000000000000..87985bd46423
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -0,0 +1,50 @@
+
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+static int live_gt_resume(void *arg)
+{
+ struct intel_gt *gt = arg;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ /* Do several suspend/resume cycles to check we don't explode! */
+ do {
+ intel_gt_suspend(gt);
+
+ if (gt->rc6.enabled) {
+ pr_err("rc6 still enabled after suspend!\n");
+ intel_gt_set_wedged_on_init(gt);
+ err = -EINVAL;
+ break;
+ }
+
+ err = intel_gt_resume(gt);
+ if (err)
+ break;
+
+ if (gt->rc6.supported && !gt->rc6.enabled) {
+ pr_err("rc6 not enabled upon resume!\n");
+ intel_gt_set_wedged_on_init(gt);
+ err = -EINVAL;
+ break;
+ }
+ } while (!__igt_timeout(end_time, NULL));
+
+ return err;
+}
+
+int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_gt_resume),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index a0098fc35921..e8a40df79bd0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -131,7 +131,7 @@ static struct i915_request *
hang_create_request(struct hang *h, struct intel_engine_cs *engine)
{
struct intel_gt *gt = h->gt;
- struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm;
+ struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct i915_vma *hws, *vma;
@@ -141,12 +141,15 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
int err;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
- if (IS_ERR(obj))
+ if (IS_ERR(obj)) {
+ i915_vm_put(vm);
return ERR_CAST(obj);
+ }
vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915));
if (IS_ERR(vaddr)) {
i915_gem_object_put(obj);
+ i915_vm_put(vm);
return ERR_CAST(vaddr);
}
@@ -157,16 +160,22 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
h->batch = vaddr;
vma = i915_vma_instance(h->obj, vm, NULL);
- if (IS_ERR(vma))
+ if (IS_ERR(vma)) {
+ i915_vm_put(vm);
return ERR_CAST(vma);
+ }
hws = i915_vma_instance(h->hws, vm, NULL);
- if (IS_ERR(hws))
+ if (IS_ERR(hws)) {
+ i915_vm_put(vm);
return ERR_CAST(hws);
+ }
err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
+ if (err) {
+ i915_vm_put(vm);
return ERR_PTR(err);
+ }
err = i915_vma_pin(hws, 0, 0, PIN_USER);
if (err)
@@ -264,6 +273,7 @@ unpin_hws:
i915_vma_unpin(hws);
unpin_vma:
i915_vma_unpin(vma);
+ i915_vm_put(vm);
return err ? ERR_PTR(err) : rq;
}
@@ -285,7 +295,7 @@ static void hang_fini(struct hang *h)
kernel_context_close(h->ctx);
- igt_flush_test(h->gt->i915, I915_WAIT_LOCKED);
+ igt_flush_test(h->gt->i915);
}
static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -309,10 +319,9 @@ static int igt_hang_sanitycheck(void *arg)
/* Basic check that we can execute our hanging batch */
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
- goto unlock;
+ return err;
for_each_engine(engine, gt->i915, id) {
struct intel_wedge_me w;
@@ -355,8 +364,6 @@ static int igt_hang_sanitycheck(void *arg)
fini:
hang_fini(&h);
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
return err;
}
@@ -383,9 +390,7 @@ static int igt_reset_nop(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
ctx = live_context(gt->i915, file);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
@@ -395,8 +400,6 @@ static int igt_reset_nop(void *arg)
reset_count = i915_reset_count(global);
count = 0;
do {
- mutex_lock(&gt->i915->drm.struct_mutex);
-
for_each_engine(engine, gt->i915, id) {
int i;
@@ -417,7 +420,6 @@ static int igt_reset_nop(void *arg)
intel_gt_reset(gt, ALL_ENGINES, NULL);
igt_global_reset_unlock(gt);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (intel_gt_is_wedged(gt)) {
err = -EIO;
break;
@@ -429,16 +431,13 @@ static int igt_reset_nop(void *arg)
break;
}
- err = igt_flush_test(gt->i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
} while (time_before(jiffies, end_time));
pr_info("%s: %d resets\n", __func__, count);
- mutex_lock(&gt->i915->drm.struct_mutex);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
+ err = igt_flush_test(gt->i915);
out:
mock_file_free(gt->i915, file);
if (intel_gt_is_wedged(gt))
@@ -458,16 +457,14 @@ static int igt_reset_nop_engine(void *arg)
/* Check that we can engine-reset during non-user portions */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
file = mock_file(gt->i915);
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
ctx = live_context(gt->i915, file);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
@@ -494,7 +491,6 @@ static int igt_reset_nop_engine(void *arg)
break;
}
- mutex_lock(&gt->i915->drm.struct_mutex);
for (i = 0; i < 16; i++) {
struct i915_request *rq;
@@ -507,7 +503,6 @@ static int igt_reset_nop_engine(void *arg)
i915_request_add(rq);
}
err = intel_engine_reset(engine, NULL);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (err) {
pr_err("i915_reset_engine failed\n");
break;
@@ -533,15 +528,12 @@ static int igt_reset_nop_engine(void *arg)
if (err)
break;
- err = igt_flush_test(gt->i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
- mutex_lock(&gt->i915->drm.struct_mutex);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
+ err = igt_flush_test(gt->i915);
out:
mock_file_free(gt->i915, file);
if (intel_gt_is_wedged(gt))
@@ -559,13 +551,11 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
/* Check that we can issue an engine reset on an idle engine (no-op) */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (active) {
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (err)
return err;
}
@@ -593,17 +583,14 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (active) {
struct i915_request *rq;
- mutex_lock(&gt->i915->drm.struct_mutex);
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
break;
}
i915_request_get(rq);
i915_request_add(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (!wait_until_running(&h, rq)) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -647,7 +634,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (err)
break;
- err = igt_flush_test(gt->i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -655,11 +642,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
if (intel_gt_is_wedged(gt))
err = -EIO;
- if (active) {
- mutex_lock(&gt->i915->drm.struct_mutex);
+ if (active)
hang_fini(&h);
- mutex_unlock(&gt->i915->drm.struct_mutex);
- }
return err;
}
@@ -725,9 +709,7 @@ static int active_engine(void *data)
return PTR_ERR(file);
for (count = 0; count < ARRAY_SIZE(ctx); count++) {
- mutex_lock(&engine->i915->drm.struct_mutex);
ctx[count] = live_context(engine->i915, file);
- mutex_unlock(&engine->i915->drm.struct_mutex);
if (IS_ERR(ctx[count])) {
err = PTR_ERR(ctx[count]);
while (--count)
@@ -741,10 +723,8 @@ static int active_engine(void *data)
struct i915_request *old = rq[idx];
struct i915_request *new;
- mutex_lock(&engine->i915->drm.struct_mutex);
new = igt_request_alloc(ctx[idx], engine);
if (IS_ERR(new)) {
- mutex_unlock(&engine->i915->drm.struct_mutex);
err = PTR_ERR(new);
break;
}
@@ -755,7 +735,6 @@ static int active_engine(void *data)
rq[idx] = i915_request_get(new);
i915_request_add(new);
- mutex_unlock(&engine->i915->drm.struct_mutex);
err = active_request_put(old);
if (err)
@@ -791,13 +770,11 @@ static int __igt_reset_engines(struct intel_gt *gt,
* with any other engine.
*/
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (flags & TEST_ACTIVE) {
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (err)
return err;
@@ -855,17 +832,14 @@ static int __igt_reset_engines(struct intel_gt *gt,
struct i915_request *rq = NULL;
if (flags & TEST_ACTIVE) {
- mutex_lock(&gt->i915->drm.struct_mutex);
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
break;
}
i915_request_get(rq);
i915_request_add(rq);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (!wait_until_running(&h, rq)) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -977,9 +951,7 @@ unwind:
if (err)
break;
- mutex_lock(&gt->i915->drm.struct_mutex);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -987,11 +959,8 @@ unwind:
if (intel_gt_is_wedged(gt))
err = -EIO;
- if (flags & TEST_ACTIVE) {
- mutex_lock(&gt->i915->drm.struct_mutex);
+ if (flags & TEST_ACTIVE)
hang_fini(&h);
- mutex_unlock(&gt->i915->drm.struct_mutex);
- }
return err;
}
@@ -1061,7 +1030,6 @@ static int igt_reset_wait(void *arg)
igt_global_reset_lock(gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
goto unlock;
@@ -1109,7 +1077,6 @@ out_rq:
fini:
hang_fini(&h);
unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
@@ -1127,15 +1094,14 @@ static int evict_vma(void *data)
{
struct evict_vma *arg = data;
struct i915_address_space *vm = arg->vma->vm;
- struct drm_i915_private *i915 = vm->i915;
struct drm_mm_node evict = arg->vma->node;
int err;
complete(&arg->completion);
- mutex_lock(&i915->drm.struct_mutex);
+ mutex_lock(&vm->mutex);
err = i915_gem_evict_for_node(vm, &evict, 0);
- mutex_unlock(&i915->drm.struct_mutex);
+ mutex_unlock(&vm->mutex);
return err;
}
@@ -1143,39 +1109,33 @@ static int evict_vma(void *data)
static int evict_fence(void *data)
{
struct evict_vma *arg = data;
- struct drm_i915_private *i915 = arg->vma->vm->i915;
int err;
complete(&arg->completion);
- mutex_lock(&i915->drm.struct_mutex);
-
/* Mark the fence register as dirty to force the mmio update. */
err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
if (err) {
pr_err("Invalid Y-tiling settings; err:%d\n", err);
- goto out_unlock;
+ return err;
}
err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
if (err) {
pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
- goto out_unlock;
+ return err;
}
err = i915_vma_pin_fence(arg->vma);
i915_vma_unpin(arg->vma);
if (err) {
pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
- goto out_unlock;
+ return err;
}
i915_vma_unpin_fence(arg->vma);
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
+ return 0;
}
static int __igt_reset_evict_vma(struct intel_gt *gt,
@@ -1196,10 +1156,9 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
/* Check that we can recover an unbind stuck on a hanging request */
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
- goto unlock;
+ return err;
obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
if (IS_ERR(obj)) {
@@ -1262,8 +1221,6 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
if (err)
goto out_rq;
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
if (!wait_until_running(&h, rq)) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
@@ -1312,16 +1269,12 @@ out_reset:
put_task_struct(tsk);
}
- mutex_lock(&gt->i915->drm.struct_mutex);
out_rq:
i915_request_put(rq);
out_obj:
i915_gem_object_put(obj);
fini:
hang_fini(&h);
-unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
if (intel_gt_is_wedged(gt))
return -EIO;
@@ -1340,6 +1293,7 @@ static int igt_reset_evict_ppgtt(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gem_context *ctx;
+ struct i915_address_space *vm;
struct drm_file *file;
int err;
@@ -1347,18 +1301,20 @@ static int igt_reset_evict_ppgtt(void *arg)
if (IS_ERR(file))
return PTR_ERR(file);
- mutex_lock(&gt->i915->drm.struct_mutex);
ctx = live_context(gt->i915, file);
- mutex_unlock(&gt->i915->drm.struct_mutex);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto out;
}
err = 0;
- if (ctx->vm) /* aliasing == global gtt locking, covered above */
- err = __igt_reset_evict_vma(gt, ctx->vm,
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ if (!i915_is_ggtt(vm)) {
+ /* aliasing == global gtt locking, covered above */
+ err = __igt_reset_evict_vma(gt, vm,
evict_vma, EXEC_OBJECT_WRITE);
+ }
+ i915_vm_put(vm);
out:
mock_file_free(gt->i915, file);
@@ -1403,7 +1359,6 @@ static int igt_reset_queue(void *arg)
igt_global_reset_lock(gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
err = hang_init(&h, gt);
if (err)
goto unlock;
@@ -1518,7 +1473,7 @@ static int igt_reset_queue(void *arg)
i915_request_put(prev);
- err = igt_flush_test(gt->i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -1526,7 +1481,6 @@ static int igt_reset_queue(void *arg)
fini:
hang_fini(&h);
unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
igt_global_reset_unlock(gt);
if (intel_gt_is_wedged(gt))
@@ -1547,17 +1501,15 @@ static int igt_handle_error(void *arg)
/* Check that we can issue a global GPU and engine reset */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
- mutex_lock(&gt->i915->drm.struct_mutex);
-
err = hang_init(&h, gt);
if (err)
- goto err_unlock;
+ return err;
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
@@ -1581,8 +1533,6 @@ static int igt_handle_error(void *arg)
goto err_request;
}
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
/* Temporarily disable error capture */
error = xchg(&global->first_error, (void *)-1);
@@ -1590,8 +1540,6 @@ static int igt_handle_error(void *arg)
xchg(&global->first_error, error);
- mutex_lock(&gt->i915->drm.struct_mutex);
-
if (rq->fence.error != -EIO) {
pr_err("Guilty request not identified!\n");
err = -EINVAL;
@@ -1602,8 +1550,6 @@ err_request:
i915_request_put(rq);
err_fini:
hang_fini(&h);
-err_unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
return err;
}
@@ -1689,14 +1635,13 @@ static int igt_reset_engines_atomic(void *arg)
/* Check that the engines resets are usable from atomic context */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (USES_GUC_SUBMISSION(gt->i915))
return 0;
igt_global_reset_lock(gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
/* Flush any requests before we get started and check basics */
if (!igt_force_reset(gt))
@@ -1716,9 +1661,7 @@ static int igt_reset_engines_atomic(void *arg)
out:
/* As we poke around the guts, do a full reset before continuing. */
igt_force_reset(gt);
-
unlock:
- mutex_unlock(&gt->i915->drm.struct_mutex);
igt_global_reset_unlock(gt);
return err;
@@ -1746,7 +1689,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
bool saved_hangcheck;
int err;
- if (!intel_has_gpu_reset(gt->i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
if (intel_gt_is_wedged(gt))
@@ -1758,10 +1701,6 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
err = intel_gt_live_subtests(tests, gt);
- mutex_lock(&gt->i915->drm.struct_mutex);
- igt_flush_test(gt->i915, I915_WAIT_LOCKED);
- mutex_unlock(&gt->i915->drm.struct_mutex);
-
i915_modparams.enable_hangcheck = saved_hangcheck;
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index d791158988d6..393ae5321e1d 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -26,17 +26,13 @@ static int live_sanitycheck(void *arg)
struct i915_gem_context *ctx;
struct intel_context *ce;
struct igt_spinner spin;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (igt_spinner_init(&spin, &i915->gt))
- goto err_unlock;
+ return -ENOMEM;
ctx = kernel_context(i915);
if (!ctx)
@@ -61,7 +57,7 @@ static int live_sanitycheck(void *arg)
}
igt_spinner_end(&spin);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(i915)) {
err = -EIO;
goto err_ctx;
}
@@ -73,12 +69,187 @@ err_ctx:
kernel_context_close(ctx);
err_spin:
igt_spinner_fini(&spin);
+ return err;
+}
+
+static int live_unlite_restore(struct drm_i915_private *i915, int prio)
+{
+ struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ struct igt_spinner spin;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we can correctly context switch between 2 instances
+ * on the same engine from the same parent context.
+ */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+ if (igt_spinner_init(&spin, &i915->gt))
+ goto err_unlock;
+
+ ctx = kernel_context(i915);
+ if (!ctx)
+ goto err_spin;
+
+ err = 0;
+ for_each_engine(engine, i915, id) {
+ struct intel_context *ce[2] = {};
+ struct i915_request *rq[2];
+ struct igt_live_test t;
+ int n;
+
+ if (prio && !intel_engine_has_preemption(engine))
+ continue;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(ctx, engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err_ce;
+ }
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err_ce;
+ }
+
+ /*
+ * Setup the pair of contexts such that if we
+ * lite-restore using the RING_TAIL from ce[1] it
+ * will execute garbage from ce[0]->ring.
+ */
+ memset(tmp->ring->vaddr,
+ POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+ tmp->ring->vma->size);
+
+ ce[n] = tmp;
+ }
+ GEM_BUG_ON(!ce[1]->ring->size);
+ intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+
+ local_bh_disable(); /* appease lockdep */
+ __context_pin_acquire(ce[1]);
+ __execlists_update_reg_state(ce[1], engine);
+ __context_pin_release(ce[1]);
+ local_bh_enable();
+
+ rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ goto err_ce;
+ }
+
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
+
+ if (!igt_wait_for_spinner(&spin, rq[0])) {
+ i915_request_put(rq[0]);
+ goto err_ce;
+ }
+
+ rq[1] = i915_request_create(ce[1]);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ i915_request_put(rq[0]);
+ goto err_ce;
+ }
+
+ if (!prio) {
+ /*
+ * Ensure we do the switch to ce[1] on completion.
+ *
+ * rq[0] is already submitted, so this should reduce
+ * to a no-op (a wait on a request on the same engine
+ * uses the submit fence, not the completion fence),
+ * but it will install a dependency on rq[1] for rq[0]
+ * that will prevent the pair being reordered by
+ * timeslicing.
+ */
+ i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ }
+
+ i915_request_get(rq[1]);
+ i915_request_add(rq[1]);
+ GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
+ i915_request_put(rq[0]);
+
+ if (prio) {
+ struct i915_sched_attr attr = {
+ .priority = prio,
+ };
+
+ /* Alternatively preempt the spinner with ce[1] */
+ engine->schedule(rq[1], &attr);
+ }
+
+ /* And switch back to ce[0] for good measure */
+ rq[0] = i915_request_create(ce[0]);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ i915_request_put(rq[1]);
+ goto err_ce;
+ }
+
+ i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+
+err_ce:
+ tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+ igt_spinner_end(&spin);
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ if (IS_ERR_OR_NULL(ce[n]))
+ break;
+
+ intel_context_unpin(ce[n]);
+ intel_context_put(ce[n]);
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ kernel_context_close(ctx);
+err_spin:
+ igt_spinner_fini(&spin);
err_unlock:
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
mutex_unlock(&i915->drm.struct_mutex);
return err;
}
+static int live_unlite_switch(void *arg)
+{
+ return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+ return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
static int
emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
{
@@ -206,8 +377,7 @@ slice_semaphore_queue(struct intel_engine_cs *outer,
if (err)
goto out;
- if (i915_request_wait(head,
- I915_WAIT_LOCKED,
+ if (i915_request_wait(head, 0,
2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
count, n);
@@ -225,7 +395,6 @@ static int live_timeslice_preempt(void *arg)
{
struct drm_i915_private *i915 = arg;
struct drm_i915_gem_object *obj;
- intel_wakeref_t wakeref;
struct i915_vma *vma;
void *vaddr;
int err = 0;
@@ -240,14 +409,9 @@ static int live_timeslice_preempt(void *arg)
* ready task.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
- if (IS_ERR(obj)) {
- err = PTR_ERR(obj);
- goto err_unlock;
- }
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
if (IS_ERR(vma)) {
@@ -279,7 +443,7 @@ static int live_timeslice_preempt(void *arg)
if (err)
goto err_pin;
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(i915)) {
err = -EIO;
goto err_pin;
}
@@ -292,10 +456,6 @@ err_map:
i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
return err;
}
@@ -307,7 +467,6 @@ static int live_busywait_preempt(void *arg)
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
u32 *map;
@@ -316,12 +475,9 @@ static int live_busywait_preempt(void *arg)
* preempt the busywaits used to synchronise between rings.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
ctx_hi = kernel_context(i915);
if (!ctx_hi)
- goto err_unlock;
+ return -ENOMEM;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
@@ -475,9 +631,6 @@ err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -506,7 +659,6 @@ static int live_preempt(void *arg)
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
@@ -515,11 +667,8 @@ static int live_preempt(void *arg)
if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
pr_err("Logical preemption supported, but not exposed\n");
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (igt_spinner_init(&spin_hi, &i915->gt))
- goto err_unlock;
+ return -ENOMEM;
if (igt_spinner_init(&spin_lo, &i915->gt))
goto err_spin_hi;
@@ -599,9 +748,6 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -613,17 +759,13 @@ static int live_late_preempt(void *arg)
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {};
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (igt_spinner_init(&spin_hi, &i915->gt))
- goto err_unlock;
+ return -ENOMEM;
if (igt_spinner_init(&spin_lo, &i915->gt))
goto err_spin_hi;
@@ -705,9 +847,6 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
@@ -752,7 +891,6 @@ static int live_nopreempt(void *arg)
struct intel_engine_cs *engine;
struct preempt_client a, b;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -763,11 +901,8 @@ static int live_nopreempt(void *arg)
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (preempt_client_init(i915, &a))
- goto err_unlock;
+ return -ENOMEM;
if (preempt_client_init(i915, &b))
goto err_client_a;
b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
@@ -832,7 +967,7 @@ static int live_nopreempt(void *arg)
goto err_wedged;
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
goto err_wedged;
}
@@ -841,9 +976,6 @@ err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
@@ -863,7 +995,6 @@ static int live_suppress_self_preempt(void *arg)
};
struct preempt_client a, b;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -882,11 +1013,8 @@ static int live_suppress_self_preempt(void *arg)
if (intel_vgpu_active(i915))
return 0; /* GVT forces single port & request submission */
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (preempt_client_init(i915, &a))
- goto err_unlock;
+ return -ENOMEM;
if (preempt_client_init(i915, &b))
goto err_client_a;
@@ -897,6 +1025,10 @@ static int live_suppress_self_preempt(void *arg)
if (!intel_engine_has_preemption(engine))
continue;
+ if (igt_flush_test(i915))
+ goto err_wedged;
+
+ intel_engine_pm_get(engine);
engine->execlists.preempt_hang.count = 0;
rq_a = spinner_create_request(&a.spin,
@@ -904,12 +1036,14 @@ static int live_suppress_self_preempt(void *arg)
MI_NOOP);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
+ intel_engine_pm_put(engine);
goto err_client_b;
}
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
+ intel_engine_pm_put(engine);
goto err_wedged;
}
@@ -921,6 +1055,7 @@ static int live_suppress_self_preempt(void *arg)
MI_NOOP);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
+ intel_engine_pm_put(engine);
goto err_client_b;
}
i915_request_add(rq_b);
@@ -931,6 +1066,7 @@ static int live_suppress_self_preempt(void *arg)
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
+ intel_engine_pm_put(engine);
goto err_wedged;
}
@@ -944,11 +1080,13 @@ static int live_suppress_self_preempt(void *arg)
engine->name,
engine->execlists.preempt_hang.count,
depth);
+ intel_engine_pm_put(engine);
err = -EINVAL;
goto err_client_b;
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(i915))
goto err_wedged;
}
@@ -957,9 +1095,6 @@ err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
@@ -984,9 +1119,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
if (!rq)
return NULL;
- INIT_LIST_HEAD(&rq->active_list);
rq->engine = engine;
+ spin_lock_init(&rq->lock);
+ INIT_LIST_HEAD(&rq->fence.cb_list);
+ rq->fence.lock = &rq->lock;
+ rq->fence.ops = &i915_fence_ops;
+
i915_sched_node_init(&rq->sched);
/* mark this request as permanently incomplete */
@@ -1025,7 +1164,6 @@ static int live_suppress_wait_preempt(void *arg)
struct preempt_client client[4];
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
int i;
@@ -1038,11 +1176,8 @@ static int live_suppress_wait_preempt(void *arg)
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
- goto err_unlock;
+ return -ENOMEM;
if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
goto err_client_0;
if (preempt_client_init(i915, &client[2])) /* head of queue */
@@ -1079,8 +1214,8 @@ static int live_suppress_wait_preempt(void *arg)
}
/* Disable NEWCLIENT promotion */
- __i915_active_request_set(&rq[i]->timeline->last_request,
- dummy);
+ __i915_active_fence_set(&i915_request_timeline(rq[i])->last_request,
+ &dummy->fence);
i915_request_add(rq[i]);
}
@@ -1105,7 +1240,7 @@ static int live_suppress_wait_preempt(void *arg)
for (i = 0; i < ARRAY_SIZE(client); i++)
igt_spinner_end(&client[i].spin);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
goto err_wedged;
if (engine->execlists.preempt_hang.count) {
@@ -1128,9 +1263,6 @@ err_client_1:
preempt_client_fini(&client[1]);
err_client_0:
preempt_client_fini(&client[0]);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
@@ -1147,7 +1279,6 @@ static int live_chain_preempt(void *arg)
struct intel_engine_cs *engine;
struct preempt_client hi, lo;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -1159,11 +1290,8 @@ static int live_chain_preempt(void *arg)
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (preempt_client_init(i915, &hi))
- goto err_unlock;
+ return -ENOMEM;
if (preempt_client_init(i915, &lo))
goto err_client_hi;
@@ -1274,9 +1402,6 @@ err_client_lo:
preempt_client_fini(&lo);
err_client_hi:
preempt_client_fini(&hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
@@ -1294,20 +1419,16 @@ static int live_preempt_hang(void *arg)
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
if (!HAS_LOGICAL_RING_PREEMPTION(i915))
return 0;
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(&i915->gt))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
if (igt_spinner_init(&spin_hi, &i915->gt))
- goto err_unlock;
+ return -ENOMEM;
if (igt_spinner_init(&spin_lo, &i915->gt))
goto err_spin_hi;
@@ -1384,7 +1505,7 @@ static int live_preempt_hang(void *arg)
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(i915)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -1399,9 +1520,6 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -1440,7 +1558,11 @@ static int smoke_submit(struct preempt_smoke *smoke,
int err = 0;
if (batch) {
- vma = i915_vma_instance(batch, ctx->vm, NULL);
+ struct i915_address_space *vm;
+
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(batch, vm, NULL);
+ i915_vm_put(vm);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1489,11 +1611,9 @@ static int smoke_crescendo_thread(void *arg)
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
- mutex_lock(&smoke->i915->drm.struct_mutex);
err = smoke_submit(smoke,
ctx, count % I915_PRIORITY_MAX,
smoke->batch);
- mutex_unlock(&smoke->i915->drm.struct_mutex);
if (err)
return err;
@@ -1514,8 +1634,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
unsigned long count;
int err = 0;
- mutex_unlock(&smoke->i915->drm.struct_mutex);
-
for_each_engine(engine, smoke->i915, id) {
arg[id] = *smoke;
arg[id].engine = engine;
@@ -1548,8 +1666,6 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
put_task_struct(tsk[id]);
}
- mutex_lock(&smoke->i915->drm.struct_mutex);
-
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
count, flags,
RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
@@ -1592,7 +1708,6 @@ static int live_preempt_smoke(void *arg)
.ncontext = 1024,
};
const unsigned int phase[] = { 0, BATCH };
- intel_wakeref_t wakeref;
struct igt_live_test t;
int err = -ENOMEM;
u32 *cs;
@@ -1607,13 +1722,10 @@ static int live_preempt_smoke(void *arg)
if (!smoke.contexts)
return -ENOMEM;
- mutex_lock(&smoke.i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
-
smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
if (IS_ERR(smoke.batch)) {
err = PTR_ERR(smoke.batch);
- goto err_unlock;
+ goto err_free;
}
cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
@@ -1660,9 +1772,7 @@ err_ctx:
err_batch:
i915_gem_object_put(smoke.batch);
-err_unlock:
- intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
- mutex_unlock(&smoke.i915->drm.struct_mutex);
+err_free:
kfree(smoke.contexts);
return err;
@@ -1781,7 +1891,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
prime, div64_u64(ktime_to_ns(times[1]), prime));
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
for (nc = 0; nc < nctx; nc++) {
@@ -1800,19 +1910,17 @@ static int live_virtual_engine(void *arg)
struct intel_gt *gt = &i915->gt;
enum intel_engine_id id;
unsigned int class, inst;
- int err = -ENODEV;
+ int err;
if (USES_GUC_SUBMISSION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for_each_engine(engine, i915, id) {
err = nop_virtual_engine(i915, &engine, 1, 1, 0);
if (err) {
pr_err("Failed to wrap engine %s: err=%d\n",
engine->name, err);
- goto out_unlock;
+ return err;
}
}
@@ -1833,17 +1941,15 @@ static int live_virtual_engine(void *arg)
err = nop_virtual_engine(i915, siblings, nsibling,
n, 0);
if (err)
- goto out_unlock;
+ return err;
}
err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
if (err)
- goto out_unlock;
+ return err;
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
+ return 0;
}
static int mask_virtual_engine(struct drm_i915_private *i915,
@@ -1922,11 +2028,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
}
err = igt_live_test_end(&t);
- if (err)
- goto out;
-
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
for (n = 0; n < nsibling; n++)
@@ -1947,13 +2050,11 @@ static int live_virtual_mask(void *arg)
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_gt *gt = &i915->gt;
unsigned int class, inst;
- int err = 0;
+ int err;
if (USES_GUC_SUBMISSION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
unsigned int nsibling;
@@ -1969,12 +2070,10 @@ static int live_virtual_mask(void *arg)
err = mask_virtual_engine(i915, siblings, nsibling);
if (err)
- goto out_unlock;
+ return err;
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
+ return 0;
}
static int bond_virtual_engine(struct drm_i915_private *i915,
@@ -2104,7 +2203,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
out:
for (n = 0; !IS_ERR(rq[n]); n++)
i915_request_put(rq[n]);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
kernel_context_close(ctx);
@@ -2125,13 +2224,11 @@ static int live_virtual_bond(void *arg)
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_gt *gt = &i915->gt;
unsigned int class, inst;
- int err = 0;
+ int err;
if (USES_GUC_SUBMISSION(i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
const struct phase *p;
int nsibling;
@@ -2154,20 +2251,20 @@ static int live_virtual_bond(void *arg)
if (err) {
pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
__func__, p->name, class, nsibling, err);
- goto out_unlock;
+ return err;
}
}
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
+ return 0;
}
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_sanitycheck),
+ SUBTEST(live_unlite_switch),
+ SUBTEST(live_unlite_preempt),
SUBTEST(live_timeslice_preempt),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
@@ -2191,3 +2288,145 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
return i915_live_subtests(tests, i915);
}
+
+static void hexdump(const void *buf, size_t len)
+{
+ const size_t rowsize = 8 * sizeof(u32);
+ const void *prev = NULL;
+ bool skip = false;
+ size_t pos;
+
+ for (pos = 0; pos < len; pos += rowsize) {
+ char line[128];
+
+ if (prev && !memcmp(prev, buf + pos, rowsize)) {
+ if (!skip) {
+ pr_info("*\n");
+ skip = true;
+ }
+ continue;
+ }
+
+ WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+ rowsize, sizeof(u32),
+ line, sizeof(line),
+ false) >= sizeof(line));
+ pr_info("[%04zx] %s\n", pos, line);
+
+ prev = buf + pos;
+ skip = false;
+ }
+}
+
+static int live_lrc_layout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 *mem;
+ int err;
+
+ /*
+ * Check the registers offsets we use to create the initial reg state
+ * match the layout saved by HW.
+ */
+
+ mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+
+ err = 0;
+ for_each_engine(engine, gt->i915, id) {
+ u32 *hw, *lrc;
+ int dw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ lrc = memset(mem, 0, PAGE_SIZE);
+ execlists_init_reg_state(lrc,
+ engine->kernel_context,
+ engine,
+ engine->kernel_context->ring,
+ true);
+
+ dw = 0;
+ do {
+ u32 lri = hw[dw];
+
+ if (lri == 0) {
+ dw++;
+ continue;
+ }
+
+ if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+ engine->name, dw, lri);
+ err = -EINVAL;
+ break;
+ }
+
+ if (lrc[dw] != lri) {
+ pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+ engine->name, dw, lri, lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ lri &= 0x7f;
+ lri++;
+ dw++;
+
+ while (lri) {
+ if (hw[dw] != lrc[dw]) {
+ pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+ engine->name, dw, hw[dw], lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Skip over the actual register value as we
+ * expect that to differ.
+ */
+ dw += 2;
+ lri -= 2;
+ }
+ } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ if (err) {
+ pr_info("%s: HW register image:\n", engine->name);
+ hexdump(hw, PAGE_SIZE);
+
+ pr_info("%s: SW register image:\n", engine->name);
+ hexdump(lrc, PAGE_SIZE);
+ }
+
+ i915_gem_object_unpin_map(engine->default_state);
+ if (err)
+ break;
+ }
+
+ kfree(mem);
+ return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_lrc_layout),
+ };
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 00a4f60cdfd5..d79482db7fe8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -112,7 +112,7 @@ static int igt_atomic_engine_reset(void *arg)
/* Check that the resets are usable from atomic context */
- if (!intel_has_reset_engine(gt->i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (USES_GUC_SUBMISSION(gt->i915))
@@ -170,7 +170,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
};
struct intel_gt *gt = &i915->gt;
- if (!intel_has_gpu_reset(gt->i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
if (intel_gt_is_wedged(gt))
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
index 321481403165..d6df40cdc8a6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -6,8 +6,9 @@
#include <linux/prime_numbers.h>
-#include "gem/i915_gem_pm.h"
+#include "intel_engine_pm.h"
#include "intel_gt.h"
+#include "intel_gt_requests.h"
#include "../selftests/i915_random.h"
#include "../i915_selftest.h"
@@ -136,7 +137,6 @@ static int mock_hwsp_freelist(void *arg)
goto err_put;
}
- mutex_lock(&state.i915->drm.struct_mutex);
for (p = phases; p->name; p++) {
pr_debug("%s(%s)\n", __func__, p->name);
for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
@@ -149,7 +149,6 @@ static int mock_hwsp_freelist(void *arg)
out:
for (na = 0; na < state.max; na++)
__mock_hwsp_record(&state, na, NULL);
- mutex_unlock(&state.i915->drm.struct_mutex);
kfree(state.history);
err_put:
drm_dev_put(&state.i915->drm);
@@ -449,8 +448,6 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
struct i915_request *rq;
int err;
- lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */
-
err = intel_timeline_pin(tl);
if (err) {
rq = ERR_PTR(err);
@@ -461,10 +458,14 @@ tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
if (IS_ERR(rq))
goto out_unpin;
+ i915_request_get(rq);
+
err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
i915_request_add(rq);
- if (err)
+ if (err) {
+ i915_request_put(rq);
rq = ERR_PTR(err);
+ }
out_unpin:
intel_timeline_unpin(tl);
@@ -500,7 +501,6 @@ static int live_hwsp_engine(void *arg)
struct intel_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
unsigned long count, n;
int err = 0;
@@ -515,14 +515,13 @@ static int live_hwsp_engine(void *arg)
if (!timelines)
return -ENOMEM;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
for_each_engine(engine, i915, id) {
if (!intel_engine_can_store_dword(engine))
continue;
+ intel_engine_pm_get(engine);
+
for (n = 0; n < NUM_TIMELINES; n++) {
struct intel_timeline *tl;
struct i915_request *rq;
@@ -530,22 +529,26 @@ static int live_hwsp_engine(void *arg)
tl = checked_intel_timeline_create(i915);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
- goto out;
+ break;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
intel_timeline_put(tl);
err = PTR_ERR(rq);
- goto out;
+ break;
}
timelines[count++] = tl;
+ i915_request_put(rq);
}
+
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
}
-out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
for (n = 0; n < count; n++) {
@@ -559,11 +562,7 @@ out:
intel_timeline_put(tl);
}
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
kvfree(timelines);
-
return err;
#undef NUM_TIMELINES
}
@@ -575,7 +574,6 @@ static int live_hwsp_alternate(void *arg)
struct intel_timeline **timelines;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
unsigned long count, n;
int err = 0;
@@ -591,9 +589,6 @@ static int live_hwsp_alternate(void *arg)
if (!timelines)
return -ENOMEM;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
for (n = 0; n < NUM_TIMELINES; n++) {
for_each_engine(engine, i915, id) {
@@ -605,11 +600,14 @@ static int live_hwsp_alternate(void *arg)
tl = checked_intel_timeline_create(i915);
if (IS_ERR(tl)) {
+ intel_engine_pm_put(engine);
err = PTR_ERR(tl);
goto out;
}
+ intel_engine_pm_get(engine);
rq = tl_write(tl, engine, count);
+ intel_engine_pm_put(engine);
if (IS_ERR(rq)) {
intel_timeline_put(tl);
err = PTR_ERR(rq);
@@ -617,11 +615,12 @@ static int live_hwsp_alternate(void *arg)
}
timelines[count++] = tl;
+ i915_request_put(rq);
}
}
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
for (n = 0; n < count; n++) {
@@ -635,11 +634,7 @@ out:
intel_timeline_put(tl);
}
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
kvfree(timelines);
-
return err;
#undef NUM_TIMELINES
}
@@ -647,10 +642,10 @@ out:
static int live_hwsp_wrap(void *arg)
{
struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = &i915->gt;
struct intel_engine_cs *engine;
struct intel_timeline *tl;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = 0;
/*
@@ -658,14 +653,10 @@ static int live_hwsp_wrap(void *arg)
* foreign GPU references.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ tl = intel_timeline_create(gt, NULL);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
- tl = intel_timeline_create(&i915->gt, NULL);
- if (IS_ERR(tl)) {
- err = PTR_ERR(tl);
- goto out_rpm;
- }
if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
goto out_free;
@@ -673,7 +664,7 @@ static int live_hwsp_wrap(void *arg)
if (err)
goto out_free;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt->i915, id) {
const u32 *hwsp_seqno[2];
struct i915_request *rq;
u32 seqno[2];
@@ -681,7 +672,9 @@ static int live_hwsp_wrap(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
+ intel_engine_pm_get(engine);
rq = i915_request_create(engine->kernel_context);
+ intel_engine_pm_put(engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
@@ -743,20 +736,16 @@ static int live_hwsp_wrap(void *arg)
goto out;
}
- i915_retire_requests(i915); /* recycle HWSP */
+ intel_gt_retire_requests(gt); /* recycle HWSP */
}
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
intel_timeline_unpin(tl);
out_free:
intel_timeline_put(tl);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
-
return err;
}
@@ -765,7 +754,6 @@ static int live_hwsp_recycle(void *arg)
struct drm_i915_private *i915 = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
unsigned long count;
int err = 0;
@@ -775,9 +763,6 @@ static int live_hwsp_recycle(void *arg)
* want to confuse ourselves or the GPU.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
count = 0;
for_each_engine(engine, i915, id) {
IGT_TIMEOUT(end_time);
@@ -785,6 +770,8 @@ static int live_hwsp_recycle(void *arg)
if (!intel_engine_can_store_dword(engine))
continue;
+ intel_engine_pm_get(engine);
+
do {
struct intel_timeline *tl;
struct i915_request *rq;
@@ -792,21 +779,22 @@ static int live_hwsp_recycle(void *arg)
tl = checked_intel_timeline_create(i915);
if (IS_ERR(tl)) {
err = PTR_ERR(tl);
- goto out;
+ break;
}
rq = tl_write(tl, engine, count);
if (IS_ERR(rq)) {
intel_timeline_put(tl);
err = PTR_ERR(rq);
- goto out;
+ break;
}
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("Wait for timeline writes timed out!\n");
+ i915_request_put(rq);
intel_timeline_put(tl);
err = -EIO;
- goto out;
+ break;
}
if (*tl->hwsp_seqno != count) {
@@ -815,17 +803,18 @@ static int live_hwsp_recycle(void *arg)
err = -EINVAL;
}
+ i915_request_put(rq);
intel_timeline_put(tl);
count++;
if (err)
- goto out;
+ break;
} while (!__igt_timeout(end_time, NULL));
- }
-out:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index d06d68ac2a3b..95627e80f246 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -260,7 +260,6 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
rq = igt_spinner_create_request(spin, ce, MI_NOOP);
intel_context_put(ce);
- kernel_context_close(ctx);
if (IS_ERR(rq)) {
spin = NULL;
@@ -279,6 +278,7 @@ err:
if (err && spin)
igt_spinner_end(spin);
+ kernel_context_close(ctx);
return err;
}
@@ -355,6 +355,7 @@ out_ctx:
static struct i915_vma *create_batch(struct i915_gem_context *ctx)
{
struct drm_i915_gem_object *obj;
+ struct i915_address_space *vm;
struct i915_vma *vma;
int err;
@@ -362,7 +363,9 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx)
if (IS_ERR(obj))
return ERR_CAST(obj);
- vma = i915_vma_instance(obj, ctx->vm, NULL);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(obj, vm, NULL);
+ i915_vm_put(vm);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -463,12 +466,15 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
0xffff00ff,
0xffffffff,
};
+ struct i915_address_space *vm;
struct i915_vma *scratch;
struct i915_vma *batch;
int err = 0, i, v;
u32 *cs, *results;
- scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ scratch = create_scratch(vm, 2 * ARRAY_SIZE(values) + 1);
+ i915_vm_put(vm);
if (IS_ERR(scratch))
return PTR_ERR(scratch);
@@ -565,6 +571,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
goto err_request;
}
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto err_request;
+
err = engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
0);
@@ -668,7 +682,7 @@ out_unpin:
break;
}
- if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(ctx->i915))
err = -EIO;
out_batch:
i915_vma_unpin_and_release(&batch, 0);
@@ -694,9 +708,7 @@ static int live_dirty_whitelist(void *arg)
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- mutex_unlock(&i915->drm.struct_mutex);
file = mock_file(i915);
- mutex_lock(&i915->drm.struct_mutex);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto out_rpm;
@@ -718,9 +730,7 @@ static int live_dirty_whitelist(void *arg)
}
out_file:
- mutex_unlock(&i915->drm.struct_mutex);
mock_file_free(i915, file);
- mutex_lock(&i915->drm.struct_mutex);
out_rpm:
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
return err;
@@ -739,7 +749,7 @@ static int live_reset_whitelist(void *arg)
igt_global_reset_lock(&i915->gt);
- if (intel_has_reset_engine(i915)) {
+ if (intel_has_reset_engine(&i915->gt)) {
err = check_whitelist_across_reset(engine,
do_engine_reset,
"engine");
@@ -747,7 +757,7 @@ static int live_reset_whitelist(void *arg)
goto out;
}
- if (intel_has_gpu_reset(i915)) {
+ if (intel_has_gpu_reset(&i915->gt)) {
err = check_whitelist_across_reset(engine,
do_device_reset,
"device");
@@ -850,6 +860,14 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
goto err_request;
}
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto err_request;
+
/* Perform the writes from an unprivileged "user" batch */
err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
@@ -994,6 +1012,7 @@ static int live_isolated_whitelist(void *arg)
return 0;
for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct i915_address_space *vm;
struct i915_gem_context *c;
c = kernel_context(i915);
@@ -1002,22 +1021,27 @@ static int live_isolated_whitelist(void *arg)
goto err;
}
- client[i].scratch[0] = create_scratch(c->vm, 1024);
+ vm = i915_gem_context_get_vm_rcu(c);
+
+ client[i].scratch[0] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[0])) {
err = PTR_ERR(client[i].scratch[0]);
+ i915_vm_put(vm);
kernel_context_close(c);
goto err;
}
- client[i].scratch[1] = create_scratch(c->vm, 1024);
+ client[i].scratch[1] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[1])) {
err = PTR_ERR(client[i].scratch[1]);
i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+ i915_vm_put(vm);
kernel_context_close(c);
goto err;
}
client[i].ctx = c;
+ i915_vm_put(vm);
}
for_each_engine(engine, i915, id) {
@@ -1074,7 +1098,7 @@ err:
kernel_context_close(client[i].ctx);
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(i915))
err = -EIO;
return err;
@@ -1115,7 +1139,7 @@ live_gpu_reset_workarounds(void *arg)
struct wa_lists lists;
bool ok;
- if (!intel_has_gpu_reset(i915))
+ if (!intel_has_gpu_reset(&i915->gt))
return 0;
ctx = kernel_context(i915);
@@ -1162,7 +1186,7 @@ live_engine_reset_workarounds(void *arg)
struct wa_lists lists;
int ret = 0;
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(&i915->gt))
return 0;
ctx = kernel_context(i915);
@@ -1232,7 +1256,7 @@ err:
igt_global_reset_unlock(&i915->gt);
kernel_context_close(ctx);
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(i915);
return ret;
}
@@ -1246,14 +1270,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_gpu_reset_workarounds),
SUBTEST(live_engine_reset_workarounds),
};
- int err;
if (intel_gt_is_wedged(&i915->gt))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- err = i915_subtests(tests, i915);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
+ return i915_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
index 598170efcaf6..2a77c051f36a 100644
--- a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -15,7 +15,7 @@ void mock_timeline_init(struct intel_timeline *timeline, u64 context)
mutex_init(&timeline->mutex);
- INIT_ACTIVE_REQUEST(&timeline->last_request, &timeline->mutex);
+ INIT_ACTIVE_FENCE(&timeline->last_request, &timeline->mutex);
INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index edf194d23c6b..1949346e714e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -83,6 +83,9 @@
#define GEN8_GTCR _MMIO(0x4274)
#define GEN8_GTCR_INVALIDATE (1<<0)
+#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8)
+#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0)
+
#define GUC_ARAT_C6DIS _MMIO(0xA178)
#define GUC_SHIM_CONTROL _MMIO(0xc064)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 71ee7ab035cc..29a9eec60d2e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -224,17 +224,7 @@ static int guc_enable_communication(struct intel_guc *guc)
return 0;
}
-static void guc_stop_communication(struct intel_guc *guc)
-{
- intel_guc_ct_stop(&guc->ct);
-
- guc->send = intel_guc_send_nop;
- guc->handler = intel_guc_to_host_event_handler_nop;
-
- guc_clear_mmio_msg(guc);
-}
-
-static void guc_disable_communication(struct intel_guc *guc)
+static void __guc_stop_communication(struct intel_guc *guc)
{
/*
* Events generated during or after CT disable are logged by guc in
@@ -247,6 +237,20 @@ static void guc_disable_communication(struct intel_guc *guc)
guc->send = intel_guc_send_nop;
guc->handler = intel_guc_to_host_event_handler_nop;
+}
+
+static void guc_stop_communication(struct intel_guc *guc)
+{
+ intel_guc_ct_stop(&guc->ct);
+
+ __guc_stop_communication(guc);
+
+ DRM_INFO("GuC communication stopped\n");
+}
+
+static void guc_disable_communication(struct intel_guc *guc)
+{
+ __guc_stop_communication(guc);
intel_guc_ct_disable(&guc->ct);
@@ -537,7 +541,9 @@ void intel_uc_fini_hw(struct intel_uc *uc)
if (intel_uc_supports_guc_submission(uc))
intel_guc_submission_disable(guc);
- guc_disable_communication(guc);
+ if (guc_communication_enabled(guc))
+ guc_disable_communication(guc);
+
__uc_sanitize(uc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index bd22bf11adad..bb4889d2346d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -39,25 +39,27 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* Must be ordered based on platform + revid, from newer to older.
*/
#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
- fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 8, 4, 3238)) \
- fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \
- fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 03, 01, 2893)) \
- fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 02, 00, 1810)) \
- fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 01, 8, 2893)) \
- fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 01, 07, 1398))
-
-#define __MAKE_UC_FW_PATH(prefix_, name_, separator_, major_, minor_, patch_) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
+ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0))
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
"i915/" \
__stringify(prefix_) name_ \
- __stringify(major_) separator_ \
- __stringify(minor_) separator_ \
+ __stringify(major_) "." \
+ __stringify(minor_) "." \
__stringify(patch_) ".bin"
#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
- __MAKE_UC_FW_PATH(prefix_, "_guc_", ".", major_, minor_, patch_)
+ __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_)
#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
- __MAKE_UC_FW_PATH(prefix_, "_huc_ver", "_", major_, minor_, bld_num_)
+ __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
/* All blobs need to be declared via MODULE_FIRMWARE() */
#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
@@ -337,25 +339,10 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw, struct drm_i915_private *i915)
}
/* Get version numbers from the CSS header */
- switch (uc_fw->type) {
- case INTEL_UC_FW_TYPE_GUC:
- uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MAJOR,
- css->sw_version);
- uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_GUC_MINOR,
- css->sw_version);
- break;
-
- case INTEL_UC_FW_TYPE_HUC:
- uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MAJOR,
- css->sw_version);
- uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_HUC_MINOR,
- css->sw_version);
- break;
-
- default:
- MISSING_CASE(uc_fw->type);
- break;
- }
+ uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+ css->sw_version);
+ uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+ css->sw_version);
if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
@@ -400,7 +387,7 @@ static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw, struct i915_ggtt *ggtt)
{
struct drm_mm_node *node = &ggtt->uc_fw;
- GEM_BUG_ON(!node->allocated);
+ GEM_BUG_ON(!drm_mm_node_allocated(node));
GEM_BUG_ON(upper_32_bits(node->start));
GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
index ae58e8a8c53b..f8f6c91a0df6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -69,11 +69,9 @@ struct uc_css_header {
char username[8];
char buildnumber[12];
u32 sw_version;
-#define CSS_SW_VERSION_GUC_MAJOR (0xFF << 16)
-#define CSS_SW_VERSION_GUC_MINOR (0xFF << 8)
-#define CSS_SW_VERSION_GUC_PATCH (0xFF << 0)
-#define CSS_SW_VERSION_HUC_MAJOR (0xFFFF << 16)
-#define CSS_SW_VERSION_HUC_MINOR (0xFFFF << 0)
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
u32 reserved[14];
u32 header_info;
} __packed;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bba0eafe1cdb..f927f851aadf 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -116,7 +116,6 @@ static int igt_guc_clients(void *args)
int err = 0;
GEM_BUG_ON(!HAS_GT_UC(dev_priv));
- mutex_lock(&dev_priv->drm.struct_mutex);
wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
guc = &dev_priv->gt.uc.guc;
@@ -190,7 +189,6 @@ out:
guc_clients_enable(guc);
unlock:
intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
- mutex_unlock(&dev_priv->drm.struct_mutex);
return err;
}
@@ -208,7 +206,6 @@ static int igt_guc_doorbells(void *arg)
u16 db_id;
GEM_BUG_ON(!HAS_GT_UC(dev_priv));
- mutex_lock(&dev_priv->drm.struct_mutex);
wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
guc = &dev_priv->gt.uc.guc;
@@ -299,7 +296,6 @@ out:
}
unlock:
intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
- mutex_unlock(&dev_priv->drm.struct_mutex);
return err;
}
OpenPOWER on IntegriCloud