From 3f6e9822308127104a7bb007ca569f2c57d03b67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 May 2018 19:33:55 +0100 Subject: drm/i915: Stop parking the signaler around reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We cannot call kthread_park() from softirq context, so let's avoid it entirely during the reset. We wanted to suspend the signaler so that it would not mark a request as complete at the same time as we marked it as being in error. Instead of parking the signaling, stop the engine from advancing so that the GPU doesn't emit the breadcrumb for our chosen "guilty" request. v2: Refactor setting STOP_RING so that we don't have the same code thrice Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Michałt Winiarski CC: Michel Thierry Cc: Jeff McGee Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180516183355.10553-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6bfd7e3ed152..d4e159ae65a6 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -769,6 +769,35 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + int err; + + if (INTEL_GEN(dev_priv) < 3) + return -ENODEV; + + GEM_TRACE("%s\n", engine->name); + + I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + + err = 0; + if (__intel_wait_for_register_fw(dev_priv, + mode, MODE_IDLE, MODE_IDLE, + 1000, 0, + NULL)) { + GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + err = -ETIMEDOUT; + } + + /* A final mmio read to let GPU writes be hopefully flushed to memory */ + POSTING_READ_FW(mode); + + return err; +} + const char *i915_cache_level_str(struct drm_i915_private *i915, int type) { switch (type) { -- cgit v1.2.3 From 96d4f03c20d04c80026b1ec3643c090cf4f0eb20 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 May 2018 16:28:24 +0100 Subject: drm/i915: Nul-terminate legacy debug string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that when we don't have any scheduler attributes for the request, the string is terminated. Fixes: 247870ac8ea7 ("drm/i915: Build request info on stack before printk") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180517152824.11619-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index d4e159ae65a6..e78c6e769e8c 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1143,7 +1143,7 @@ static void print_request(struct drm_printer *m, const char *prefix) { const char *name = rq->fence.ops->get_timeline_name(&rq->fence); - char buf[80]; + char buf[80] = ""; int x = 0; x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); -- cgit v1.2.3 From 4e0d64dba816adf18c17488d38ede67a3d0e9b40 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 May 2018 22:26:30 +0100 Subject: drm/i915: Move request->ctx aside In the next patch, we want to store the intel_context pointer inside i915_request, as it is frequently access via a convoluted dance when submitting the request to hw. Having two context pointers inside i915_request leads to confusion so first rename the existing i915_gem_context pointer to i915_request.gem_context. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180517212633.24934-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 4 +-- drivers/gpu/drm/i915/i915_debugfs.c | 4 +-- drivers/gpu/drm/i915/i915_gem.c | 10 ++++---- drivers/gpu/drm/i915/i915_gpu_error.c | 18 ++++++++------ drivers/gpu/drm/i915/i915_request.c | 12 ++++----- drivers/gpu/drm/i915/i915_request.h | 2 +- drivers/gpu/drm/i915/i915_trace.h | 10 ++++---- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 7 +++--- drivers/gpu/drm/i915/intel_lrc.c | 31 +++++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 ++++----- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 5 +++- drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- 13 files changed, 64 insertions(+), 55 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c2d183b91500..17f9f8d7e148 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -205,7 +205,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) static inline bool is_gvt_request(struct i915_request *req) { - return i915_gem_context_force_single_submission(req->ctx); + return i915_gem_context_force_single_submission(req->gem_context); } static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) @@ -305,7 +305,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) struct i915_request *req = workload->req; if (IS_KABYLAKE(req->i915) && - is_inhibit_context(req->ctx, req->engine->id)) + is_inhibit_context(req->gem_context, req->engine->id)) intel_vgpu_restore_inhibit_context(vgpu, req); /* allocate shadow ring buffer */ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d78beaabc051..52515445ac40 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -542,8 +542,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data) struct i915_request, client_link); rcu_read_lock(); - task = pid_task(request && request->ctx->pid ? - request->ctx->pid : file->pid, + task = pid_task(request && request->gem_context->pid ? + request->gem_context->pid : file->pid, PIDTYPE_PID); print_file_stats(m, task ? task->comm : "", stats); rcu_read_unlock(); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b0fe452ce17c..a20f8db5729d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3067,7 +3067,7 @@ static void skip_request(struct i915_request *request) static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct i915_gem_context *hung_ctx = request->ctx; + struct i915_gem_context *hung_ctx = request->gem_context; struct i915_timeline *timeline = request->timeline; unsigned long flags; @@ -3077,7 +3077,7 @@ static void engine_skip_context(struct i915_request *request) spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); list_for_each_entry_continue(request, &engine->timeline.requests, link) - if (request->ctx == hung_ctx) + if (request->gem_context == hung_ctx) skip_request(request); list_for_each_entry(request, &timeline->requests, link) @@ -3123,11 +3123,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } if (stalled) { - i915_gem_context_mark_guilty(request->ctx); + i915_gem_context_mark_guilty(request->gem_context); skip_request(request); /* If this context is now banned, skip all pending requests. */ - if (i915_gem_context_is_banned(request->ctx)) + if (i915_gem_context_is_banned(request->gem_context)) engine_skip_context(request); } else { /* @@ -3137,7 +3137,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine, */ request = i915_gem_find_active_request(engine); if (request) { - i915_gem_context_mark_innocent(request->ctx); + i915_gem_context_mark_innocent(request->gem_context); dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index efb808272460..37c9a42654ba 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1287,9 +1287,11 @@ static void error_record_engine_registers(struct i915_gpu_state *error, static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { - erq->context = request->ctx->hw_id; + struct i915_gem_context *ctx = request->gem_context; + + erq->context = ctx->hw_id; erq->sched_attr = request->sched.attr; - erq->ban_score = atomic_read(&request->ctx->ban_score); + erq->ban_score = atomic_read(&ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; erq->start = i915_ggtt_offset(request->ring->vma); @@ -1297,7 +1299,7 @@ static void record_request(struct i915_request *request, erq->tail = request->tail; rcu_read_lock(); - erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0; + erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0; rcu_read_unlock(); } @@ -1461,12 +1463,12 @@ static void gem_record_rings(struct i915_gpu_state *error) request = i915_gem_find_active_request(engine); if (request) { + struct i915_gem_context *ctx = request->gem_context; struct intel_ring *ring; - ee->vm = request->ctx->ppgtt ? - &request->ctx->ppgtt->base : &ggtt->base; + ee->vm = ctx->ppgtt ? &ctx->ppgtt->base : &ggtt->base; - record_context(&ee->context, request->ctx); + record_context(&ee->context, ctx); /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten @@ -1483,11 +1485,11 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - to_intel_context(request->ctx, + to_intel_context(ctx, engine)->state); error->simulated |= - i915_gem_context_no_error_capture(request->ctx); + i915_gem_context_no_error_capture(ctx); ee->rq_head = request->head; ee->rq_post = request->postfix; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 8928894dd9c7..fe8810a6a339 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -384,7 +384,7 @@ static void __retire_engine_request(struct intel_engine_cs *engine, */ if (engine->last_retired_context) intel_context_unpin(engine->last_retired_context, engine); - engine->last_retired_context = rq->ctx; + engine->last_retired_context = rq->gem_context; } static void __retire_engine_upto(struct intel_engine_cs *engine, @@ -455,8 +455,8 @@ static void i915_request_retire(struct i915_request *request) i915_request_remove_from_client(request); /* Retirement decays the ban score as it is a sign of ctx progress */ - atomic_dec_if_positive(&request->ctx->ban_score); - intel_context_unpin(request->ctx, request->engine); + atomic_dec_if_positive(&request->gem_context->ban_score); + intel_context_unpin(request->gem_context, request->engine); __retire_engine_upto(request->engine, request); @@ -760,7 +760,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) INIT_LIST_HEAD(&rq->active_list); rq->i915 = i915; rq->engine = engine; - rq->ctx = ctx; + rq->gem_context = ctx; rq->ring = ring; rq->timeline = ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); @@ -814,7 +814,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) goto err_unwind; /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(rq->ctx, engine); + __intel_context_pin(rq->gem_context, engine); /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); @@ -1113,7 +1113,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) local_bh_disable(); rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(request, &request->ctx->sched); + engine->schedule(request, &request->gem_context->sched); rcu_read_unlock(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index eddbd4245cb3..dddecd9ffd0c 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -93,7 +93,7 @@ struct i915_request { * i915_request_free() will then decrement the refcount on the * context. */ - struct i915_gem_context *ctx; + struct i915_gem_context *gem_context; struct intel_engine_cs *engine; struct intel_ring *ring; struct i915_timeline *timeline; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 8cc3a256f29d..5d4f78765083 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -624,7 +624,7 @@ TRACE_EVENT(i915_request_queue, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; + __entry->hw_id = rq->gem_context->hw_id; __entry->ring = rq->engine->id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; @@ -651,7 +651,7 @@ DECLARE_EVENT_CLASS(i915_request, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; + __entry->hw_id = rq->gem_context->hw_id; __entry->ring = rq->engine->id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; @@ -696,7 +696,7 @@ TRACE_EVENT(i915_request_in, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; + __entry->hw_id = rq->gem_context->hw_id; __entry->ring = rq->engine->id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; @@ -727,7 +727,7 @@ TRACE_EVENT(i915_request_out, TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; + __entry->hw_id = rq->gem_context->hw_id; __entry->ring = rq->engine->id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; @@ -815,7 +815,7 @@ TRACE_EVENT(i915_request_wait_begin, */ TP_fast_assign( __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; + __entry->hw_id = rq->gem_context->hw_id; __entry->ring = rq->engine->id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e78c6e769e8c..7983b8a1ad44 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1020,7 +1020,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) */ rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) - return rq->ctx == kernel_context; + return rq->gem_context == kernel_context; else return engine->last_retired_context == kernel_context; } diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 637e852888ec..a432a193f3c4 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -513,8 +513,9 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(rq->ctx, - engine)); + u32 ctx_desc = + lower_32_bits(intel_lr_context_descriptor(rq->gem_context, + engine)); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); spin_lock(&client->wq_lock); @@ -725,7 +726,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { - if (last && rq->ctx != last->ctx) { + if (last && rq->gem_context != last->gem_context) { if (port == last_port) { __list_del_many(&p->requests, &rq->sched.link); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ae5adad7cc63..1e9cc55d785c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -418,9 +418,10 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); + struct intel_context *ce = + to_intel_context(rq->gem_context, rq->engine); struct i915_hw_ppgtt *ppgtt = - rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; reg_state[CTX_RING_TAIL+1] = intel_ring_set_tail(rq->ring, rq->tail); @@ -679,7 +680,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * second request, and so we never need to tell the * hardware about the first. */ - if (last && !can_merge_ctx(rq->ctx, last->ctx)) { + if (last && !can_merge_ctx(rq->gem_context, + last->gem_context)) { /* * If we are on the second port and cannot * combine this request with the last, then we @@ -698,14 +700,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->ctx) || - ctx_single_port_submission(rq->ctx)) { + if (ctx_single_port_submission(last->gem_context) || + ctx_single_port_submission(rq->gem_context)) { __list_del_many(&p->requests, &rq->sched.link); goto done; } - GEM_BUG_ON(last->ctx == rq->ctx); + GEM_BUG_ON(last->gem_context == rq->gem_context); if (submit) port_assign(port, last); @@ -1437,7 +1439,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { struct intel_context *ce = - to_intel_context(request->ctx, request->engine); + to_intel_context(request->gem_context, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1954,7 +1956,7 @@ static void execlists_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = to_intel_context(request->ctx, engine)->lrc_reg_state; + regs = to_intel_context(request->gem_context, engine)->lrc_reg_state; if (engine->default_state) { void *defaults; @@ -1967,7 +1969,8 @@ static void execlists_reset(struct intel_engine_cs *engine, i915_gem_object_unpin_map(engine->default_state); } } - execlists_init_reg_state(regs, request->ctx, engine, request->ring); + execlists_init_reg_state(regs, + request->gem_context, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); @@ -1989,7 +1992,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) static int intel_logical_ring_emit_pdps(struct i915_request *rq) { - struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; + struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt; struct intel_engine_cs *engine = rq->engine; const int num_lri_cmds = GEN8_3LVL_PDPES * 2; u32 *cs; @@ -2028,15 +2031,15 @@ static int gen8_emit_bb_start(struct i915_request *rq, * it is unsafe in case of lite-restore (because the ctx is * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ - if (rq->ctx->ppgtt && - (intel_engine_flag(rq->engine) & rq->ctx->ppgtt->pd_dirty_rings) && - !i915_vm_is_48bit(&rq->ctx->ppgtt->base) && + if (rq->gem_context->ppgtt && + (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&rq->gem_context->ppgtt->base) && !intel_vgpu_active(rq->i915)) { ret = intel_logical_ring_emit_pdps(rq); if (ret) return ret; - rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); + rq->gem_context->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } cs = intel_ring_begin(rq, 6); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6f200a747176..53703012ec75 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -571,8 +571,8 @@ static void reset_ring(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = to_intel_context(request->ctx, - engine); + struct intel_context *ce = + to_intel_context(request->gem_context, engine); struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -584,7 +584,7 @@ static void reset_ring(struct intel_engine_cs *engine, CCID_EN); } - ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt; + ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt; if (ppgtt) { u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; @@ -1458,7 +1458,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; + *cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1526,7 +1526,7 @@ static int remap_l3(struct i915_request *rq, int slice) static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *to_ctx = rq->ctx; + struct i915_gem_context *to_ctx = rq->gem_context; struct i915_hw_ppgtt *to_mm = to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; struct i915_gem_context *from_ctx = engine->legacy_active_context; @@ -1597,7 +1597,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); + GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 438e0b045a2c..2c4e77c050dc 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -105,7 +105,10 @@ static int emit_recurse_batch(struct hang *h, struct i915_request *rq) { struct drm_i915_private *i915 = h->i915; - struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; + struct i915_address_space *vm = + rq->gem_context->ppgtt ? + &rq->gem_context->ppgtt->base : + &i915->ggtt.base; struct i915_vma *hws, *vma; unsigned int flags; u32 *batch; diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 1b8a07125150..68cb9126b3e1 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -83,7 +83,7 @@ static int emit_recurse_batch(struct spinner *spin, struct i915_request *rq, u32 arbitration_command) { - struct i915_address_space *vm = &rq->ctx->ppgtt->base; + struct i915_address_space *vm = &rq->gem_context->ppgtt->base; struct i915_vma *hws, *vma; u32 *batch; int err; -- cgit v1.2.3 From 01278cb143955f4b592568d1756f1baf506245c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 May 2018 22:26:31 +0100 Subject: drm/i915: Move fiddling with engine->last_retired_context Move the knowledge about resetting the current context tracking on the engine from inside i915_gem_context.c into intel_engine_cs.c Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180517212633.24934-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 12 ++---------- drivers/gpu/drm/i915/intel_engine_cs.c | 23 +++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 4bf18b5c6f1d..9e70f4dfa703 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -514,16 +514,8 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv, id) { - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; - - if (!engine->last_retired_context) - continue; - - intel_context_unpin(engine->last_retired_context, engine); - engine->last_retired_context = NULL; - } + for_each_engine(engine, dev_priv, id) + intel_engine_lost_context(engine); } void i915_gem_contexts_fini(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 7983b8a1ad44..9e618aab6568 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1096,6 +1096,29 @@ void intel_engines_unpark(struct drm_i915_private *i915) } } +/** + * intel_engine_lost_context: called when the GPU is reset into unknown state + * @engine: the engine + * + * We have either reset the GPU or otherwise about to lose state tracking of + * the current GPU logical state (e.g. suspend). On next use, it is therefore + * imperative that we make no presumptions about the current state and load + * from scratch. + */ +void intel_engine_lost_context(struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx; + + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + engine->legacy_active_context = NULL; + engine->legacy_active_ppgtt = NULL; + + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + intel_context_unpin(ctx, engine); +} + bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 61f385a92484..2b16185e36c4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1053,6 +1053,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine); +void intel_engine_lost_context(struct intel_engine_cs *engine); void intel_engines_park(struct drm_i915_private *i915); void intel_engines_unpark(struct drm_i915_private *i915); -- cgit v1.2.3 From 1fc44d9b1afb0afe46acd99bdfdf793805a850e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 May 2018 22:26:32 +0100 Subject: drm/i915: Store a pointer to intel_context in i915_request To ease the frequent and ugly pointer dance of &request->gem_context->engine[request->engine->id] during request submission, store that pointer as request->hw_context. One major advantage that we will exploit later is that this decouples the logical context state from the engine itself. v2: Set mock_context->ops so we don't crash and burn in selftests. Cleanups from Tvrtko. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Acked-by: Zhenyu Wang Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180517212633.24934-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/mmio_context.c | 6 +- drivers/gpu/drm/i915/gvt/mmio_context.h | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 141 ++++++++++---------------- drivers/gpu/drm/i915/gvt/scheduler.h | 1 - drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +-- drivers/gpu/drm/i915/i915_gem_context.c | 17 ++-- drivers/gpu/drm/i915/i915_gem_context.h | 21 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 3 +- drivers/gpu/drm/i915/i915_perf.c | 25 ++--- drivers/gpu/drm/i915/i915_request.c | 34 +++---- drivers/gpu/drm/i915/i915_request.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 54 ++++++---- drivers/gpu/drm/i915/intel_guc_submission.c | 10 +- drivers/gpu/drm/i915/intel_lrc.c | 125 +++++++++++++---------- drivers/gpu/drm/i915/intel_lrc.h | 7 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 100 +++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 +- drivers/gpu/drm/i915/selftests/mock_context.c | 7 ++ drivers/gpu/drm/i915/selftests/mock_engine.c | 41 +++++--- 20 files changed, 321 insertions(+), 296 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index 0f949554d118..708170e61625 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -446,9 +446,9 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, #define CTX_CONTEXT_CONTROL_VAL 0x03 -bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) +bool is_inhibit_context(struct intel_context *ce) { - u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; + const u32 *reg_state = ce->lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); @@ -501,7 +501,7 @@ static void switch_mmio(struct intel_vgpu *pre, * itself. */ if (mmio->in_context && - !is_inhibit_context(s->shadow_ctx, ring_id)) + !is_inhibit_context(&s->shadow_ctx->__engine[ring_id])) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 0439eb8057a8..5c3b9ff9f96a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -49,7 +49,7 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre, void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt); -bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id); +bool is_inhibit_context(struct intel_context *ce); int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu, struct i915_request *req); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 17f9f8d7e148..e1760030dda1 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -54,11 +54,8 @@ static void set_context_pdp_root_pointer( static void update_shadow_pdps(struct intel_vgpu_workload *workload) { - struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + workload->req->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -128,9 +125,8 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + workload->req->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -280,10 +276,8 @@ static int shadow_context_status_change(struct notifier_block *nb, return NOTIFY_OK; } -static void shadow_context_descriptor_update(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void shadow_context_descriptor_update(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -292,7 +286,7 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, * like GEN8_CTX_* cached in desc_template */ desc &= U64_MAX << 12; - desc |= ctx->desc_template & ((1ULL << 12) - 1); + desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1); ce->lrc_desc = desc; } @@ -300,12 +294,11 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct i915_request *req = workload->req; void *shadow_ring_buffer_va; u32 *cs; - struct i915_request *req = workload->req; - if (IS_KABYLAKE(req->i915) && - is_inhibit_context(req->gem_context, req->engine->id)) + if (IS_KABYLAKE(req->i915) && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); /* allocate shadow ring buffer */ @@ -353,60 +346,56 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) struct intel_vgpu_submission *s = &vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - int ring_id = workload->ring_id; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct intel_ring *ring; + struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; + struct intel_context *ce; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (workload->shadowed) + if (workload->req) return 0; + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ce = intel_context_pin(shadow_ctx, engine); + if (IS_ERR(ce)) { + gvt_vgpu_err("fail to pin shadow context\n"); + return PTR_ERR(ce); + } + shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated)) - shadow_context_descriptor_update(shadow_ctx, - dev_priv->engine[ring_id]); + if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated)) + shadow_context_descriptor_update(ce); ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) - goto err_scan; + goto err_unpin; if ((workload->ring_id == RCS) && (workload->wa_ctx.indirect_ctx.size != 0)) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) - goto err_scan; - } - - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ring = intel_context_pin(shadow_ctx, engine); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - gvt_vgpu_err("fail to pin shadow context\n"); - goto err_shadow; + goto err_shadow; } ret = populate_shadow_context(workload); if (ret) - goto err_unpin; - workload->shadowed = true; + goto err_shadow; + return 0; -err_unpin: - intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_scan: +err_unpin: + intel_context_unpin(ce); return ret; } @@ -414,7 +403,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) { int ring_id = workload->ring_id; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct i915_request *rq; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; @@ -437,7 +425,6 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -517,21 +504,13 @@ err: return ret; } -static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) +static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) { - struct intel_vgpu_workload *workload = container_of(wa_ctx, - struct intel_vgpu_workload, - wa_ctx); - int ring_id = workload->ring_id; - struct intel_vgpu_submission *s = &workload->vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; - struct execlist_ring_context *shadow_ring_context; - struct page *page; - - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + struct intel_vgpu_workload *workload = + container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx); + struct i915_request *rq = workload->req; + struct execlist_ring_context *shadow_ring_context = + (struct execlist_ring_context *)rq->hw_context->lrc_reg_state; shadow_ring_context->bb_per_ctx_ptr.val = (shadow_ring_context->bb_per_ctx_ptr.val & @@ -539,9 +518,6 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) shadow_ring_context->rcs_indirect_ctx.val = (shadow_ring_context->rcs_indirect_ctx.val & (~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma; - - kunmap_atomic(shadow_ring_context); - return 0; } static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) @@ -670,12 +646,9 @@ err_unpin_mm: static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ring_id = workload->ring_id; - struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - int ret = 0; + int ret; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", ring_id, workload); @@ -687,10 +660,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) goto out; ret = prepare_workload(workload); - if (ret) { - intel_context_unpin(shadow_ctx, engine); - goto out; - } out: if (ret) @@ -765,27 +734,23 @@ out: static void update_guest_context(struct intel_vgpu_workload *workload) { + struct i915_request *rq = workload->req; struct intel_vgpu *vgpu = workload->vgpu; struct intel_gvt *gvt = vgpu->gvt; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - int ring_id = workload->ring_id; - struct drm_i915_gem_object *ctx_obj = - shadow_ctx->__engine[ring_id].state->obj; + struct drm_i915_gem_object *ctx_obj = rq->hw_context->state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; unsigned long context_gpa, context_page_num; int i; - gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, - workload->ctx_desc.lrca); - - context_page_num = gvt->dev_priv->engine[ring_id]->context_size; + gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, + workload->ctx_desc.lrca); + context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS) context_page_num = 19; i = 2; @@ -858,6 +823,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) scheduler->current_workload[ring_id]; struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_request *rq; int event; mutex_lock(&gvt->lock); @@ -866,11 +832,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * switch to make sure request is completed. * For the workload w/o request, directly complete the workload. */ - if (workload->req) { - struct drm_i915_private *dev_priv = - workload->vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = - dev_priv->engine[workload->ring_id]; + rq = fetch_and_zero(&workload->req); + if (rq) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); @@ -886,8 +849,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - i915_request_put(fetch_and_zero(&workload->req)); - if (!workload->status && !(vgpu->resetting_eng & ENGINE_MASK(ring_id))) { update_guest_context(workload); @@ -896,10 +857,13 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) INTEL_GVT_EVENT_MAX) intel_vgpu_trigger_virtual_event(vgpu, event); } - mutex_lock(&dev_priv->drm.struct_mutex); + /* unpin shadow ctx as the shadow_ctx update is done */ - intel_context_unpin(s->shadow_ctx, engine); - mutex_unlock(&dev_priv->drm.struct_mutex); + mutex_lock(&rq->i915->drm.struct_mutex); + intel_context_unpin(rq->hw_context); + mutex_unlock(&rq->i915->drm.struct_mutex); + + i915_request_put(rq); } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -1270,7 +1234,6 @@ alloc_workload(struct intel_vgpu *vgpu) atomic_set(&workload->shadow_ctx_active, 0); workload->status = -EINPROGRESS; - workload->shadowed = false; workload->vgpu = vgpu; return workload; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 6c644782193e..21eddab4a9cd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -83,7 +83,6 @@ struct intel_vgpu_workload { struct i915_request *req; /* if this workload has been dispatched to i915? */ bool dispatched; - bool shadowed; int status; struct intel_vgpu_mm *shadow_mm; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34c125e2d90c..e33c380b43e3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1950,6 +1950,7 @@ struct drm_i915_private { */ struct i915_perf_stream *exclusive_stream; + struct intel_context *pinned_ctx; u32 specific_ctx_id; struct hrtimer poll_check_timer; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a20f8db5729d..03874b50ada9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3181,14 +3181,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, i915_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) { - struct i915_gem_context *ctx; + struct intel_context *ce; i915_gem_reset_engine(engine, engine->hangcheck.active_request, stalled_mask & ENGINE_MASK(id)); - ctx = fetch_and_zero(&engine->last_retired_context); - if (ctx) - intel_context_unpin(ctx, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); /* * Ostensibily, we always want a context loaded for powersaving, @@ -4897,13 +4897,13 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) static void assert_kernel_context_is_current(struct drm_i915_private *i915) { - struct i915_gem_context *kernel_context = i915->kernel_context; + struct i915_gem_context *kctx = i915->kernel_context; struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, i915, id) { GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); - GEM_BUG_ON(engine->last_retired_context != kernel_context); + GEM_BUG_ON(engine->last_retired_context->gem_context != kctx); } } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9e70f4dfa703..b69b18ef8120 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -127,14 +127,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { struct intel_context *ce = &ctx->__engine[n]; - if (!ce->state) - continue; - - WARN_ON(ce->pin_count); - if (ce->ring) - intel_ring_free(ce->ring); - - __i915_gem_object_release_unless_active(ce->state->obj); + if (ce->ops) + ce->ops->destroy(ce); } kfree(ctx->name); @@ -266,6 +260,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) { struct i915_gem_context *ctx; + unsigned int n; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -283,6 +278,12 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->i915 = dev_priv; ctx->sched.priority = I915_PRIORITY_NORMAL; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; + + ce->gem_context = ctx; + } + INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index ace3b129c189..749a4ff566f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -45,6 +45,11 @@ struct intel_ring; #define DEFAULT_CONTEXT_HANDLE 0 +struct intel_context_ops { + void (*unpin)(struct intel_context *ce); + void (*destroy)(struct intel_context *ce); +}; + /** * struct i915_gem_context - client state * @@ -144,11 +149,14 @@ struct i915_gem_context { /** engine: per-engine logical HW state */ struct intel_context { + struct i915_gem_context *gem_context; struct i915_vma *state; struct intel_ring *ring; u32 *lrc_reg_state; u64 lrc_desc; int pin_count; + + const struct intel_context_ops *ops; } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ @@ -263,25 +271,22 @@ to_intel_context(struct i915_gem_context *ctx, return &ctx->__engine[engine->id]; } -static inline struct intel_ring * +static inline struct intel_context * intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { return engine->context_pin(engine, ctx); } -static inline void __intel_context_pin(struct i915_gem_context *ctx, - const struct intel_engine_cs *engine) +static inline void __intel_context_pin(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); - GEM_BUG_ON(!ce->pin_count); ce->pin_count++; } -static inline void intel_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static inline void intel_context_unpin(struct intel_context *ce) { - engine->context_unpin(engine, ctx); + GEM_BUG_ON(!ce->ops); + ce->ops->unpin(ce); } /* i915_gem_context.c */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 37c9a42654ba..47721437a4c5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1485,8 +1485,7 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - to_intel_context(ctx, - engine)->state); + request->hw_context->state); error->simulated |= i915_gem_context_no_error_capture(ctx); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 019bd2d073ad..4f0eb84b3c00 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1221,7 +1221,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id; } else { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct intel_ring *ring; + struct intel_context *ce; int ret; ret = i915_mutex_lock_interruptible(&dev_priv->drm); @@ -1234,19 +1234,19 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = intel_context_pin(stream->ctx, engine); + ce = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); - if (IS_ERR(ring)) - return PTR_ERR(ring); + if (IS_ERR(ce)) + return PTR_ERR(ce); + dev_priv->perf.oa.pinned_ctx = ce; /* * Explicitly track the ID (instead of calling * i915_ggtt_offset() on the fly) considering the difference * with gen8+ and execlists */ - dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); + dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state); } return 0; @@ -1262,17 +1262,14 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_context *ce; - if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - } else { - struct intel_engine_cs *engine = dev_priv->engine[RCS]; + dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx); + if (ce) { mutex_lock(&dev_priv->drm.struct_mutex); - - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - intel_context_unpin(stream->ctx, engine); - + intel_context_unpin(ce); mutex_unlock(&dev_priv->drm.struct_mutex); } } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index fe8810a6a339..fc499bcbd105 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -383,8 +383,8 @@ static void __retire_engine_request(struct intel_engine_cs *engine, * the subsequent request. */ if (engine->last_retired_context) - intel_context_unpin(engine->last_retired_context, engine); - engine->last_retired_context = rq->gem_context; + intel_context_unpin(engine->last_retired_context); + engine->last_retired_context = rq->hw_context; } static void __retire_engine_upto(struct intel_engine_cs *engine, @@ -456,7 +456,7 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->gem_context->ban_score); - intel_context_unpin(request->gem_context, request->engine); + intel_context_unpin(request->hw_context); __retire_engine_upto(request->engine, request); @@ -657,7 +657,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { struct drm_i915_private *i915 = engine->i915; struct i915_request *rq; - struct intel_ring *ring; + struct intel_context *ce; int ret; lockdep_assert_held(&i915->drm.struct_mutex); @@ -681,22 +681,21 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = intel_context_pin(ctx, engine); - if (IS_ERR(ring)) - return ERR_CAST(ring); - GEM_BUG_ON(!ring); + ce = intel_context_pin(ctx, engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); ret = reserve_gt(i915); if (ret) goto err_unpin; - ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST); + ret = intel_ring_wait_for_space(ce->ring, MIN_SPACE_FOR_ADD_REQUEST); if (ret) goto err_unreserve; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ring->request_list) && + rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ce->ring->request_list) && i915_request_completed(rq)) i915_request_retire(rq); @@ -761,8 +760,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) rq->i915 = i915; rq->engine = engine; rq->gem_context = ctx; - rq->ring = ring; - rq->timeline = ring->timeline; + rq->hw_context = ce; + rq->ring = ce->ring; + rq->timeline = ce->ring->timeline; GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); @@ -814,14 +814,14 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) goto err_unwind; /* Keep a second pin for the dual retirement along engine and ring */ - __intel_context_pin(rq->gem_context, engine); + __intel_context_pin(ce); /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; err_unwind: - rq->ring->emit = rq->head; + ce->ring->emit = rq->head; /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); @@ -832,7 +832,7 @@ err_unwind: err_unreserve: unreserve_gt(i915); err_unpin: - intel_context_unpin(ctx, engine); + intel_context_unpin(ce); return ERR_PTR(ret); } @@ -1018,8 +1018,8 @@ i915_request_await_object(struct i915_request *to, void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; - struct intel_ring *ring = request->ring; struct i915_timeline *timeline = request->timeline; + struct intel_ring *ring = request->ring; struct i915_request *prev; u32 *cs; int err; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index dddecd9ffd0c..1bbbb7a9fa03 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -95,6 +95,7 @@ struct i915_request { */ struct i915_gem_context *gem_context; struct intel_engine_cs *engine; + struct intel_context *hw_context; struct intel_ring *ring; struct i915_timeline *timeline; struct intel_signal_node signaling; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 9e618aab6568..26f9f8aab949 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -645,6 +645,12 @@ static int init_phys_status_page(struct intel_engine_cs *engine) return 0; } +static void __intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_context_unpin(to_intel_context(ctx, engine)); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -658,7 +664,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine) */ int intel_engine_init_common(struct intel_engine_cs *engine) { - struct intel_ring *ring; + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; int ret; engine->set_default_submission(engine); @@ -670,18 +677,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = intel_context_pin(engine->i915->kernel_context, engine); - if (IS_ERR(ring)) - return PTR_ERR(ring); + ce = intel_context_pin(i915->kernel_context, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); /* * Similarly the preempt context must always be available so that * we can interrupt the engine at any time. */ - if (engine->i915->preempt_context) { - ring = intel_context_pin(engine->i915->preempt_context, engine); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); + if (i915->preempt_context) { + ce = intel_context_pin(i915->preempt_context, engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); goto err_unpin_kernel; } } @@ -690,7 +697,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(engine->i915)) + if (HWS_NEEDS_PHYSICAL(i915)) ret = init_phys_status_page(engine); else ret = init_status_page(engine); @@ -702,10 +709,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine) err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: - if (engine->i915->preempt_context) - intel_context_unpin(engine->i915->preempt_context, engine); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + err_unpin_kernel: - intel_context_unpin(engine->i915->kernel_context, engine); + __intel_context_unpin(i915->kernel_context, engine); return ret; } @@ -718,6 +726,8 @@ err_unpin_kernel: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + intel_engine_cleanup_scratch(engine); if (HWS_NEEDS_PHYSICAL(engine->i915)) @@ -732,9 +742,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - if (engine->i915->preempt_context) - intel_context_unpin(engine->i915->preempt_context, engine); - intel_context_unpin(engine->i915->kernel_context, engine); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + __intel_context_unpin(i915->kernel_context, engine); i915_timeline_fini(&engine->timeline); } @@ -1007,8 +1017,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) */ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { - const struct i915_gem_context * const kernel_context = - engine->i915->kernel_context; + const struct intel_context *kernel_context = + to_intel_context(engine->i915->kernel_context, engine); struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1020,7 +1030,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) */ rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) - return rq->gem_context == kernel_context; + return rq->hw_context == kernel_context; else return engine->last_retired_context == kernel_context; } @@ -1107,16 +1117,16 @@ void intel_engines_unpark(struct drm_i915_private *i915) */ void intel_engine_lost_context(struct intel_engine_cs *engine) { - struct i915_gem_context *ctx; + struct intel_context *ce; lockdep_assert_held(&engine->i915->drm.struct_mutex); engine->legacy_active_context = NULL; engine->legacy_active_ppgtt = NULL; - ctx = fetch_and_zero(&engine->last_retired_context); - if (ctx) - intel_context_unpin(ctx, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); } bool intel_engine_can_store_dword(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index a432a193f3c4..133367a17863 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -513,9 +513,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { struct intel_guc_client *client = guc->execbuf_client; struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = - lower_32_bits(intel_lr_context_descriptor(rq->gem_context, - engine)); + u32 ctx_desc = lower_32_bits(rq->hw_context->lrc_desc); u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); spin_lock(&client->wq_lock); @@ -553,8 +551,8 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, - engine)); + u32 ctx_desc = lower_32_bits(to_intel_context(client->owner, + engine)->lrc_desc); u32 data[7]; /* @@ -726,7 +724,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) struct i915_request *rq, *rn; list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { - if (last && rq->gem_context != last->gem_context) { + if (last && rq->hw_context != last->hw_context) { if (port == last_port) { __list_del_many(&p->requests, &rq->sched.link); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1e9cc55d785c..b97c5d4c7877 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -164,7 +164,8 @@ #define WA_TAIL_BYTES (sizeof(u32) * WA_TAIL_DWORDS) static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); + struct intel_engine_cs *engine, + struct intel_context *ce); static void execlists_init_reg_state(u32 *reg_state, struct i915_gem_context *ctx, struct intel_engine_cs *engine, @@ -189,12 +190,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, !i915_request_completed(last)); } -/** - * intel_lr_context_descriptor_update() - calculate & cache the descriptor - * descriptor for a pinned context - * @ctx: Context to work on - * @engine: Engine the descriptor will be used with - * +/* * The context descriptor encodes various attributes of a context, * including its GTT address and some flags. Because it's fairly * expensive to calculate, we'll just do it once and cache the result, @@ -222,9 +218,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, */ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) + struct intel_engine_cs *engine, + struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -418,8 +414,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = - to_intel_context(rq->gem_context, rq->engine); + struct intel_context *ce = rq->hw_context; struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -496,14 +491,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK); } -static bool ctx_single_port_submission(const struct i915_gem_context *ctx) +static bool ctx_single_port_submission(const struct intel_context *ce) { return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - i915_gem_context_force_single_submission(ctx)); + i915_gem_context_force_single_submission(ce->gem_context)); } -static bool can_merge_ctx(const struct i915_gem_context *prev, - const struct i915_gem_context *next) +static bool can_merge_ctx(const struct intel_context *prev, + const struct intel_context *next) { if (prev != next) return false; @@ -680,8 +675,8 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * second request, and so we never need to tell the * hardware about the first. */ - if (last && !can_merge_ctx(rq->gem_context, - last->gem_context)) { + if (last && + !can_merge_ctx(rq->hw_context, last->hw_context)) { /* * If we are on the second port and cannot * combine this request with the last, then we @@ -700,14 +695,14 @@ static bool __execlists_dequeue(struct intel_engine_cs *engine) * the same context (even though a different * request) to the second port. */ - if (ctx_single_port_submission(last->gem_context) || - ctx_single_port_submission(rq->gem_context)) { + if (ctx_single_port_submission(last->hw_context) || + ctx_single_port_submission(rq->hw_context)) { __list_del_many(&p->requests, &rq->sched.link); goto done; } - GEM_BUG_ON(last->gem_context == rq->gem_context); + GEM_BUG_ON(last->hw_context == rq->hw_context); if (submit) port_assign(port, last); @@ -1339,6 +1334,37 @@ static void execlists_schedule(struct i915_request *request, spin_unlock_irq(&engine->timeline.lock); } +static void execlists_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(!ce->state); + GEM_BUG_ON(ce->pin_count); + + intel_ring_free(ce->ring); + __i915_gem_object_release_unless_active(ce->state->obj); +} + +static void __execlists_context_unpin(struct intel_context *ce) +{ + intel_ring_unpin(ce->ring); + + ce->state->obj->pin_global--; + i915_gem_object_unpin_map(ce->state->obj); + i915_vma_unpin(ce->state); + + i915_gem_context_put(ce->gem_context); +} + +static void execlists_context_unpin(struct intel_context *ce) +{ + lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex); + GEM_BUG_ON(ce->pin_count == 0); + + if (--ce->pin_count) + return; + + __execlists_context_unpin(ce); +} + static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) { unsigned int flags; @@ -1362,21 +1388,15 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) return i915_vma_pin(vma, 0, GEN8_LR_CONTEXT_ALIGN, flags); } -static struct intel_ring * -execlists_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_context * +__execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; - lockdep_assert_held(&ctx->i915->drm.struct_mutex); - - if (likely(ce->pin_count++)) - goto out; - GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - - ret = execlists_context_deferred_alloc(ctx, engine); + ret = execlists_context_deferred_alloc(ctx, engine, ce); if (ret) goto err; GEM_BUG_ON(!ce->state); @@ -1395,7 +1415,7 @@ execlists_context_pin(struct intel_engine_cs *engine, if (ret) goto unpin_map; - intel_lr_context_descriptor_update(ctx, engine); + intel_lr_context_descriptor_update(ctx, engine, ce); ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = @@ -1404,8 +1424,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->state->obj->pin_global++; i915_gem_context_get(ctx); -out: - return ce->ring; + return ce; unpin_map: i915_gem_object_unpin_map(ce->state->obj); @@ -1416,33 +1435,33 @@ err: return ERR_PTR(ret); } -static void execlists_context_unpin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static const struct intel_context_ops execlists_context_ops = { + .unpin = execlists_context_unpin, + .destroy = execlists_context_destroy, +}; + +static struct intel_context * +execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); - GEM_BUG_ON(ce->pin_count == 0); - if (--ce->pin_count) - return; - - intel_ring_unpin(ce->ring); + if (likely(ce->pin_count++)) + return ce; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - ce->state->obj->pin_global--; - i915_gem_object_unpin_map(ce->state->obj); - i915_vma_unpin(ce->state); + ce->ops = &execlists_context_ops; - i915_gem_context_put(ctx); + return __execlists_context_pin(engine, ctx, ce); } static int execlists_request_alloc(struct i915_request *request) { - struct intel_context *ce = - to_intel_context(request->gem_context, request->engine); int ret; - GEM_BUG_ON(!ce->pin_count); + GEM_BUG_ON(!request->hw_context->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -1956,7 +1975,7 @@ static void execlists_reset(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - regs = to_intel_context(request->gem_context, engine)->lrc_reg_state; + regs = request->hw_context->lrc_reg_state; if (engine->default_state) { void *defaults; @@ -2327,8 +2346,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->reset.finish = execlists_reset_finish; engine->context_pin = execlists_context_pin; - engine->context_unpin = execlists_context_unpin; - engine->request_alloc = execlists_request_alloc; engine->emit_flush = gen8_emit_flush; @@ -2563,7 +2580,7 @@ static void execlists_init_reg_state(u32 *regs, struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; u32 base = engine->mmio_base; - bool rcs = engine->id == RCS; + bool rcs = engine->class == RENDER_CLASS; /* A context is actually a big batch buffer with several * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The @@ -2710,10 +2727,10 @@ err_unpin_ctx: } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) + struct intel_engine_cs *engine, + struct intel_context *ce) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4ec7d8dd13c8..1593194e930c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -104,11 +104,4 @@ struct i915_gem_context; void intel_lr_context_resume(struct drm_i915_private *dev_priv); -static inline uint64_t -intel_lr_context_descriptor(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - return to_intel_context(ctx, engine)->lrc_desc; -} - #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 53703012ec75..0c0c9f531e4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -571,8 +571,7 @@ static void reset_ring(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = - to_intel_context(request->gem_context, engine); + struct intel_context *ce = request->hw_context; struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -1186,7 +1185,31 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct intel_context *ce) +static void intel_ring_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->pin_count); + + if (ce->state) + __i915_gem_object_release_unless_active(ce->state->obj); +} + +static void intel_ring_context_unpin(struct intel_context *ce) +{ + lockdep_assert_held(&ce->gem_context->i915->drm.struct_mutex); + GEM_BUG_ON(ce->pin_count == 0); + + if (--ce->pin_count) + return; + + if (ce->state) { + ce->state->obj->pin_global--; + i915_vma_unpin(ce->state); + } + + i915_gem_context_put(ce->gem_context); +} + +static int __context_pin(struct intel_context *ce) { struct i915_vma *vma = ce->state; int ret; @@ -1275,25 +1298,19 @@ err_obj: return ERR_PTR(err); } -static struct intel_ring * -intel_ring_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static struct intel_context * +__ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); - int ret; - - lockdep_assert_held(&ctx->i915->drm.struct_mutex); - - if (likely(ce->pin_count++)) - goto out; - GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ + int err; if (!ce->state && engine->context_size) { struct i915_vma *vma; vma = alloc_context_vma(engine); if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + err = PTR_ERR(vma); goto err; } @@ -1301,8 +1318,8 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ce); - if (ret) + err = __context_pin(ce); + if (err) goto err; ce->state->obj->pin_global++; @@ -1310,32 +1327,37 @@ intel_ring_context_pin(struct intel_engine_cs *engine, i915_gem_context_get(ctx); -out: /* One ringbuffer to rule them all */ - return engine->buffer; + GEM_BUG_ON(!engine->buffer); + ce->ring = engine->buffer; + + return ce; err: ce->pin_count = 0; - return ERR_PTR(ret); + return ERR_PTR(err); } -static void intel_ring_context_unpin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static const struct intel_context_ops ring_context_ops = { + .unpin = intel_ring_context_unpin, + .destroy = intel_ring_context_destroy, +}; + +static struct intel_context * +intel_ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); - GEM_BUG_ON(ce->pin_count == 0); - if (--ce->pin_count) - return; + if (likely(ce->pin_count++)) + return ce; + GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ - if (ce->state) { - ce->state->obj->pin_global--; - i915_vma_unpin(ce->state); - } + ce->ops = &ring_context_ops; - i915_gem_context_put(ctx); + return __ring_context_pin(engine, ctx, ce); } static int intel_init_ring_buffer(struct intel_engine_cs *engine) @@ -1346,10 +1368,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) intel_engine_setup_common(engine); - err = intel_engine_init_common(engine); - if (err) - goto err; - timeline = i915_timeline_create(engine->i915, engine->name); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); @@ -1371,8 +1389,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) GEM_BUG_ON(engine->buffer); engine->buffer = ring; + err = intel_engine_init_common(engine); + if (err) + goto err_unpin; + return 0; +err_unpin: + intel_ring_unpin(ring); err_ring: intel_ring_free(ring); err: @@ -1458,7 +1482,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(to_intel_context(rq->gem_context, engine)->state) | flags; + *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1549,7 +1573,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_intel_context(to_ctx, engine)->state && + if (rq->hw_context->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1597,7 +1621,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!to_intel_context(request->gem_context, request->engine)->pin_count); + GEM_BUG_ON(!request->hw_context->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -2028,8 +2052,6 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->reset.finish = reset_finish; engine->context_pin = intel_ring_context_pin; - engine->context_unpin = intel_ring_context_unpin; - engine->request_alloc = ring_request_alloc; engine->emit_breadcrumb = i9xx_emit_breadcrumb; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2b16185e36c4..20c4e13efc0d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -436,10 +436,9 @@ struct intel_engine_cs { void (*set_default_submission)(struct intel_engine_cs *engine); - struct intel_ring *(*context_pin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); - void (*context_unpin)(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); + struct intel_context *(*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + int (*request_alloc)(struct i915_request *rq); int (*init_context)(struct i915_request *rq); @@ -555,7 +554,7 @@ struct intel_engine_cs { * to the kernel context and trash it as the save may not happen * before the hardware is powered down. */ - struct i915_gem_context *last_retired_context; + struct intel_context *last_retired_context; /* We track the current MI_SET_CONTEXT in order to eliminate * redudant context switches. This presumes that requests are not diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c index 501becc47c0c..8904f1ce64e3 100644 --- a/drivers/gpu/drm/i915/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -30,6 +30,7 @@ mock_context(struct drm_i915_private *i915, const char *name) { struct i915_gem_context *ctx; + unsigned int n; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -43,6 +44,12 @@ mock_context(struct drm_i915_private *i915, INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; + + ce->gem_context = ctx; + } + ret = ida_simple_get(&i915->contexts.hw_ida, 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); if (ret < 0) diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 26bf29d97007..33eddfc1f8ce 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -72,25 +72,37 @@ static void hw_delay_complete(struct timer_list *t) spin_unlock(&engine->hw_lock); } -static struct intel_ring * -mock_context_pin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static void mock_context_unpin(struct intel_context *ce) { - struct intel_context *ce = to_intel_context(ctx, engine); + if (--ce->pin_count) + return; - if (!ce->pin_count++) - i915_gem_context_get(ctx); + i915_gem_context_put(ce->gem_context); +} - return engine->buffer; +static void mock_context_destroy(struct intel_context *ce) +{ + GEM_BUG_ON(ce->pin_count); } -static void mock_context_unpin(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) +static const struct intel_context_ops mock_context_ops = { + .unpin = mock_context_unpin, + .destroy = mock_context_destroy, +}; + +static struct intel_context * +mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = to_intel_context(ctx, engine); - if (!--ce->pin_count) - i915_gem_context_put(ctx); + if (!ce->pin_count++) { + i915_gem_context_get(ctx); + ce->ring = engine->buffer; + ce->ops = &mock_context_ops; + } + + return ce; } static int mock_request_alloc(struct i915_request *request) @@ -185,7 +197,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.status_page.page_addr = (void *)(engine + 1); engine->base.context_pin = mock_context_pin; - engine->base.context_unpin = mock_context_unpin; engine->base.request_alloc = mock_request_alloc; engine->base.emit_flush = mock_emit_flush; engine->base.emit_breadcrumb = mock_emit_breadcrumb; @@ -238,11 +249,13 @@ void mock_engine_free(struct intel_engine_cs *engine) { struct mock_engine *mock = container_of(engine, typeof(*mock), base); + struct intel_context *ce; GEM_BUG_ON(timer_pending(&mock->hw_delay)); - if (engine->last_retired_context) - intel_context_unpin(engine->last_retired_context, engine); + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); mock_ring_free(engine->buffer); -- cgit v1.2.3 From dd0cf235d81f24c1ba80c4a000bafc9f2dce3840 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 6 May 2018 18:13:28 +0100 Subject: drm/i915: Speed up idle detection by kicking the tasklets We rely on ksoftirqd to run in a timely fashion in order to drain the execlists queue. Quite frequently, it does not. In some cases we may see latencies of over 200ms triggering our idle timeouts and forcing us to declare the driver wedged! Thus we can speed up idle detection by bypassing ksoftirqd in these cases and flush our tasklet to confirm if we are indeed still waiting for the ELSP to drain. v2: Put the execlists.first check back; it is required for handling reset! References: https://bugs.freedesktop.org/show_bug.cgi?id=106373 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180506171328.30034-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 26f9f8aab949..a2173e66a63b 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -973,10 +973,19 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) - return false; + if (READ_ONCE(engine->execlists.active)) { + struct intel_engine_execlists *execlists = &engine->execlists; + + if (tasklet_trylock(&execlists->tasklet)) { + execlists->tasklet.func(execlists->tasklet.data); + tasklet_unlock(&execlists->tasklet); + } + + if (READ_ONCE(execlists->active)) + return false; + } - /* ELSP is empty, but there are ready requests? */ + /* ELSP is empty, but there are ready requests? E.g. after reset */ if (READ_ONCE(engine->execlists.first)) return false; -- cgit v1.2.3 From d6d12ec081776bdea7e0ad58f2a7e7f92e414e7f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 May 2018 10:02:10 +0100 Subject: drm/i915: Make intel_engine_dump irqsafe To be useful later, enable intel_engine_dump() to be called from irq context (i.e. using saving and restoring irq start rather than assuming we enter with irqs enabled). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180518090212.5349-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index a2173e66a63b..847797215a73 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1367,6 +1367,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; struct i915_request *rq, *last; + unsigned long flags; struct rb_node *rb; int count; @@ -1433,7 +1434,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline.lock); + local_irq_save(flags); + spin_lock(&engine->timeline.lock); last = NULL; count = 0; @@ -1475,16 +1477,17 @@ void intel_engine_dump(struct intel_engine_cs *engine, print_request(m, last, "\t\tQ "); } - spin_unlock_irq(&engine->timeline.lock); + spin_unlock(&engine->timeline.lock); - spin_lock_irq(&b->rb_lock); + spin_lock(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); drm_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->rb_lock); + spin_unlock(&b->rb_lock); + local_irq_restore(flags); drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", engine->irq_posted, -- cgit v1.2.3 From fe0c493538f42ee1cbc4701dc6f9eb2b82d856cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 18 May 2018 10:02:11 +0100 Subject: drm/i915/execlists: Handle copying default context state for atomic reset We want to be able to reset the GPU from inside a timer callback (hardirq context). One step requires us to copy the default context state over to the guilty context, which means we need to plan in advance to have that object accessible from within an atomic context. The atomic context prevents us from pinning the object or from peeking into the shmemfs backing store (all may sleep), so we choose to pin the default_state into memory when the engine becomes active. This compromise allows us to swap out the default state when idle, when required. References: 5692251c254a ("drm/i915/lrc: Scrub the GPU state of the guilty hanging request") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180518090212.5349-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 15 +++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 15 ++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 847797215a73..0c1f084ee593 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1091,6 +1091,11 @@ void intel_engines_park(struct drm_i915_private *i915) if (engine->park) engine->park(engine); + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } @@ -1108,6 +1113,16 @@ void intel_engines_unpark(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { + void *map; + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + if (engine->unpark) engine->unpark(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9abd26f22b47..46e7bf4a30a9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1965,17 +1965,10 @@ static void execlists_reset(struct intel_engine_cs *engine, * to recreate its own state. */ regs = request->hw_context->lrc_reg_state; - if (engine->default_state) { - void *defaults; - - defaults = i915_gem_object_pin_map(engine->default_state, - I915_MAP_WB); - if (!IS_ERR(defaults)) { - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - defaults + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - i915_gem_object_unpin_map(engine->default_state); - } + if (engine->pinned_default_state) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); } execlists_init_reg_state(regs, request->gem_context, engine, request->ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 20c4e13efc0d..acef385c4c80 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -342,6 +342,7 @@ struct intel_engine_cs { struct i915_timeline timeline; struct drm_i915_gem_object *default_state; + void *pinned_default_state; atomic_t irq_count; unsigned long irq_posted; -- cgit v1.2.3 From 1e40d4aea57bbbd277777dd1fe18599dd77c55ab Mon Sep 17 00:00:00 2001 From: Yunwei Zhang Date: Fri, 18 May 2018 15:39:57 -0700 Subject: drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any MMIO read into Slice/Subslice specific registers, MCR packet control register(0xFDC) needs to be programmed to point to any enabled slice/subslice pair. Otherwise, incorrect value will be returned. However, that means each subsequent MMIO read will be forwarded to a specific slice/subslice combination as read is unicast. This is OK since slice/subslice specific register values are consistent in almost all cases across slice/subslice. There are rare occasions such as INSTDONE that this value will be dependent on slice/subslice combo, in such cases, we need to program 0xFDC and recover this after. This is already covered by read_subslice_reg. Also, 0xFDC will lose its information after TDR/engine reset/power state change. References: HSD#1405586840, BSID#0575 v2: - use fls() instead of find_last_bit() (Chris) - added INTEL_SSEU to extract sseu from device info. (Chris) v3: - rebase on latest tip v5: - Added references (Mika) - Change the ordered of passing arguments and etc. (Ursulin) v7: - Moved WA explanation Comments(Oscar) - Rebased. v8: - Renamed sanitize_mcr to calculate_s_ss_select. (Oscar) - calculate s/ss selector instead of whole mcr. (Oscar) v9: - Updated function name (Oscar) - Remove redundant variables (Oscar) v10: - Separate pre-GEN10 and GEN11 mask. (Oscar) Cc: Oscar Mateo Cc: Michel Thierry Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Signed-off-by: Yunwei Zhang Reviewed-by: Oscar Mateo Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/1526683197-24656-1-git-send-email-yunwei.zhang@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 30 +++++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_workarounds.c | 27 +++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b86ed6401120..1b8bd84f0799 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2743,6 +2743,8 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); int intel_engines_init_mmio(struct drm_i915_private *dev_priv); int intel_engines_init(struct drm_i915_private *dev_priv); +u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv); + /* intel_hotplug.c */ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 0c1f084ee593..ad28680390b8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -819,12 +819,29 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } } +u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) +{ + const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu); + u32 mcr_s_ss_select; + u32 slice = fls(sseu->slice_mask); + u32 subslice = fls(sseu->subslice_mask[slice]); + + if (INTEL_GEN(dev_priv) == 10) + mcr_s_ss_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + else + mcr_s_ss_select = 0; + + return mcr_s_ss_select; +} + static inline uint32_t read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { uint32_t mcr_slice_subslice_mask; uint32_t mcr_slice_subslice_select; + uint32_t default_mcr_s_ss_select; uint32_t mcr; uint32_t ret; enum forcewake_domains fw_domains; @@ -841,6 +858,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, GEN8_MCR_SUBSLICE(subslice); } + default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, @@ -851,11 +870,10 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, intel_uncore_forcewake_get__locked(dev_priv, fw_domains); mcr = I915_READ_FW(GEN8_MCR_SELECTOR); - /* - * The HW expects the slice and sublice selectors to be reset to 0 - * after reading out the registers. - */ - WARN_ON_ONCE(mcr & mcr_slice_subslice_mask); + + WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != + default_mcr_s_ss_select); + mcr &= ~mcr_slice_subslice_mask; mcr |= mcr_slice_subslice_select; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); @@ -863,6 +881,8 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, ret = I915_READ_FW(reg); mcr &= ~mcr_slice_subslice_mask; + mcr |= default_mcr_s_ss_select; + I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(dev_priv, fw_domains); diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 2df3538ceba5..720d8635c2cf 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -672,8 +672,35 @@ static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv) GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); } +static void wa_init_mcr(struct drm_i915_private *dev_priv) +{ + u32 mcr; + u32 mcr_slice_subslice_mask; + + mcr = I915_READ(GEN8_MCR_SELECTOR); + + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + /* + * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl + * Before any MMIO read into slice/subslice specific registers, MCR + * packet control register needs to be programmed to point to any + * enabled s/ss pair. Otherwise, incorrect values will be returned. + * This means each subsequent MMIO read will be forwarded to an + * specific s/ss combination, but this is OK since these registers + * are consistent across s/ss in almost all cases. In the rare + * occasions, such as INSTDONE, where this value is dependent + * on s/ss combo, the read should be done with read_subslice_reg. + */ + mcr &= ~mcr_slice_subslice_mask; + mcr |= intel_calculate_mcr_s_ss_select(dev_priv); + I915_WRITE(GEN8_MCR_SELECTOR, mcr); +} + static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) { + wa_init_mcr(dev_priv); + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) I915_WRITE(GAMT_CHKN_BIT_REG, -- cgit v1.2.3 From d78fa508f08d7d91d8ce9e2cec1f04bd2d63f0a1 Mon Sep 17 00:00:00 2001 From: Yunwei Zhang Date: Fri, 18 May 2018 15:40:32 -0700 Subject: drm/i915/icl: Enable WaProgramMgsrForCorrectSliceSpecificMmioReads WaProgramMgsrForCorrectSliceSpecificMmioReads applies for Icelake as well. References: HSD#1405586840, BSID#0575 v2: - GEN11 mask is different from its predecessors. (Oscar) - Better separate GEN10 and GEN11. (Oscar) Cc: Oscar Mateo Cc: Michel Thierry Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Signed-off-by: Yunwei Zhang Reviewed-by: Oscar Mateo Signed-off-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/1526683232-24753-1-git-send-email-yunwei.zhang@intel.com --- drivers/gpu/drm/i915/intel_engine_cs.c | 3 +++ drivers/gpu/drm/i915/intel_workarounds.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ad28680390b8..13448ea76f57 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -829,6 +829,9 @@ u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) == 10) mcr_s_ss_select = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + else if (INTEL_GEN(dev_priv) >= 11) + mcr_s_ss_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); else mcr_s_ss_select = 0; diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index 720d8635c2cf..2deec58e97dd 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -679,10 +679,14 @@ static void wa_init_mcr(struct drm_i915_private *dev_priv) mcr = I915_READ(GEN8_MCR_SELECTOR); - mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | - GEN8_MCR_SUBSLICE_MASK; + if (INTEL_GEN(dev_priv) >= 11) + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + else + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; /* - * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl + * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl * Before any MMIO read into slice/subslice specific registers, MCR * packet control register needs to be programmed to point to any * enabled s/ss pair. Otherwise, incorrect values will be returned. @@ -719,6 +723,8 @@ static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) { + wa_init_mcr(dev_priv); + /* This is not an Wa. Enable for better image quality */ I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); -- cgit v1.2.3 From 82ad6443a55ea274ab2f0e24ada71f0529f3238b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 Jun 2018 16:37:58 +0100 Subject: drm/i915/gtt: Rename i915_hw_ppgtt base member In the near future, I want to subclass gen6_hw_ppgtt as it contains a few specialised members and I wish to add more. To avoid the ugliness of using ppgtt->base.base, rename the i915_hw_ppgtt base member (i915_address_space) as vm, which is our common shorthand for an i915_address_space local. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180605153758.18422-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/aperture_gm.c | 2 +- drivers/gpu/drm/i915/gvt/gvt.h | 4 +- drivers/gpu/drm/i915/i915_debugfs.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 30 +- drivers/gpu/drm/i915/i915_gem_context.c | 10 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 333 ++++++++++----------- drivers/gpu/drm/i915/i915_gem_gtt.h | 8 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 6 +- drivers/gpu/drm/i915/i915_gpu_error.c | 9 +- drivers/gpu/drm/i915/i915_trace.h | 4 +- drivers/gpu/drm/i915/i915_vgpu.c | 8 +- drivers/gpu/drm/i915/i915_vma.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 4 +- drivers/gpu/drm/i915/intel_guc.c | 2 +- drivers/gpu/drm/i915/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 10 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/selftests/huge_pages.c | 50 ++-- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 6 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 34 +-- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 110 +++---- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 6 +- drivers/gpu/drm/i915/selftests/i915_request.c | 5 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 31 +- drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 4 +- drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- drivers/gpu/drm/i915/selftests/intel_workarounds.c | 2 +- drivers/gpu/drm/i915/selftests/mock_gtt.c | 66 ++-- 32 files changed, 390 insertions(+), 390 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 7c9ec4f4f36c..380eeb2a0e83 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -61,7 +61,7 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) } mutex_lock(&dev_priv->drm.struct_mutex); - ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node, + ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node, size, I915_GTT_PAGE_SIZE, I915_COLOR_UNEVICTABLE, start, end, flags); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 05d15a095310..2ff0d40281a9 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -361,9 +361,9 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt); #define gvt_aperture_sz(gvt) (gvt->dev_priv->ggtt.mappable_end) #define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.gmadr.start) -#define gvt_ggtt_gm_sz(gvt) (gvt->dev_priv->ggtt.base.total) +#define gvt_ggtt_gm_sz(gvt) (gvt->dev_priv->ggtt.vm.total) #define gvt_ggtt_sz(gvt) \ - ((gvt->dev_priv->ggtt.base.total >> PAGE_SHIFT) << 3) + ((gvt->dev_priv->ggtt.vm.total >> PAGE_SHIFT) << 3) #define gvt_hidden_sz(gvt) (gvt_ggtt_gm_sz(gvt) - gvt_aperture_sz(gvt)) #define gvt_aperture_gmadr_base(gvt) (0) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 15e86d34a81c..698af45e229c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -328,7 +328,7 @@ static int per_file_stats(int id, void *ptr, void *data) } else { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vma->vm); - if (ppgtt->base.file != stats->file_priv) + if (ppgtt->vm.file != stats->file_priv) continue; } @@ -508,7 +508,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) dpy_count, dpy_size); seq_printf(m, "%llu [%pa] gtt total\n", - ggtt->base.total, &ggtt->mappable_end); + ggtt->vm.total, &ggtt->mappable_end); seq_printf(m, "Supported page sizes: %s\n", stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes, buf, sizeof(buf))); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 06ecac4c3253..a4bb30c32a52 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3213,7 +3213,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, static inline struct i915_hw_ppgtt * i915_vm_to_ppgtt(struct i915_address_space *vm) { - return container_of(vm, struct i915_hw_ppgtt, base); + return container_of(vm, struct i915_hw_ppgtt, vm); } /* i915_gem_fence_reg.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 23374f3dd79d..86f1f9aaa119 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -65,7 +65,7 @@ insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range(&ggtt->base.mm, node, + return drm_mm_insert_node_in_range(&ggtt->vm.mm, node, size, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); @@ -249,17 +249,17 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct i915_vma *vma; u64 pinned; - pinned = ggtt->base.reserved; + pinned = ggtt->vm.reserved; mutex_lock(&dev->struct_mutex); - list_for_each_entry(vma, &ggtt->base.active_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; - list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link) + list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) if (i915_vma_is_pinned(vma)) pinned += vma->node.size; mutex_unlock(&dev->struct_mutex); - args->aper_size = ggtt->base.total; + args->aper_size = ggtt->vm.total; args->aper_available_size = args->aper_size - pinned; return 0; @@ -1223,9 +1223,9 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, page_length = remain < page_length ? remain : page_length; if (node.allocated) { wmb(); - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, I915_CACHE_NONE, 0); wmb(); } else { page_base += offset & PAGE_MASK; @@ -1246,8 +1246,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, out_unpin: if (node.allocated) { wmb(); - ggtt->base.clear_range(&ggtt->base, - node.start, node.size); + ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); remove_mappable_node(&node); } else { i915_vma_unpin(vma); @@ -1426,9 +1425,9 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, page_length = remain < page_length ? remain : page_length; if (node.allocated) { wmb(); /* flush the write before we modify the GGTT */ - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, I915_CACHE_NONE, 0); wmb(); /* flush modifications to the GGTT (insert_page) */ } else { page_base += offset & PAGE_MASK; @@ -1455,8 +1454,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, out_unpin: if (node.allocated) { wmb(); - ggtt->base.clear_range(&ggtt->base, - node.start, node.size); + ggtt->vm.clear_range(&ggtt->vm, node.start, node.size); remove_mappable_node(&node); } else { i915_vma_unpin(vma); @@ -4374,7 +4372,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 flags) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_address_space *vm = &dev_priv->ggtt.base; + struct i915_address_space *vm = &dev_priv->ggtt.vm; struct i915_vma *vma; int ret; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 38c6e9e4e91b..b2c7ac1b074d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -197,7 +197,7 @@ static void context_close(struct i915_gem_context *ctx) */ lut_close(ctx); if (ctx->ppgtt) - i915_ppgtt_close(&ctx->ppgtt->base); + i915_ppgtt_close(&ctx->ppgtt->vm); ctx->file_priv = ERR_PTR(-EBADF); i915_gem_context_put(ctx); @@ -249,7 +249,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915, desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; address_mode = INTEL_LEGACY_32B_CONTEXT; - if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) + if (ppgtt && i915_vm_is_48bit(&ppgtt->vm)) address_mode = INTEL_LEGACY_64B_CONTEXT; desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; @@ -810,11 +810,11 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, break; case I915_CONTEXT_PARAM_GTT_SIZE: if (ctx->ppgtt) - args->value = ctx->ppgtt->base.total; + args->value = ctx->ppgtt->vm.total; else if (to_i915(dev)->mm.aliasing_ppgtt) - args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total; + args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total; else - args->value = to_i915(dev)->ggtt.base.total; + args->value = to_i915(dev)->ggtt.vm.total; break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: args->value = i915_gem_context_no_error_capture(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f627a8c47c58..eefd449502e2 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -703,7 +703,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return -ENOENT; eb->ctx = ctx; - eb->vm = ctx->ppgtt ? &ctx->ppgtt->base : &eb->i915->ggtt.base; + eb->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &eb->i915->ggtt.vm; eb->context_flags = 0; if (ctx->flags & CONTEXT_NO_ZEROMAP) @@ -943,9 +943,9 @@ static void reloc_cache_reset(struct reloc_cache *cache) if (cache->node.allocated) { struct i915_ggtt *ggtt = cache_to_ggtt(cache); - ggtt->base.clear_range(&ggtt->base, - cache->node.start, - cache->node.size); + ggtt->vm.clear_range(&ggtt->vm, + cache->node.start, + cache->node.size); drm_mm_remove_node(&cache->node); } else { i915_vma_unpin((struct i915_vma *)cache->node.mm); @@ -1016,7 +1016,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) { memset(&cache->node, 0, sizeof(cache->node)); err = drm_mm_insert_node_in_range - (&ggtt->base.mm, &cache->node, + (&ggtt->vm.mm, &cache->node, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); @@ -1037,9 +1037,9 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset = cache->node.start; if (cache->node.allocated) { wmb(); - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, page), - offset, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); } else { offset += page << PAGE_SHIFT; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9302ee6e717d..029a5f4fbd92 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -780,7 +780,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, */ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) { - ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask; + ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->vm.i915)->ring_mask; } /* Removes entries from a single page table, releasing it if it's empty. @@ -973,7 +973,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, gen8_pte_t *vaddr; bool ret; - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); pd = pdp->page_directory[idx->pdpe]; vaddr = kmap_atomic_px(pd->page_table[idx->pde]); do { @@ -1004,7 +1004,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); pd = pdp->page_directory[idx->pdpe]; } @@ -1233,7 +1233,7 @@ free_scratch_page: static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) { - struct i915_address_space *vm = &ppgtt->base; + struct i915_address_space *vm = &ppgtt->vm; struct drm_i915_private *dev_priv = vm->i915; enum vgt_g2v_type msg; int i; @@ -1294,13 +1294,13 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) int i; for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { - if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) + if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp) continue; - gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]); } - cleanup_px(&ppgtt->base, &ppgtt->pml4); + cleanup_px(&ppgtt->vm, &ppgtt->pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1314,7 +1314,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (use_4lvl(vm)) gen8_ppgtt_cleanup_4lvl(ppgtt); else - gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp); gen8_free_scratch(vm); } @@ -1450,7 +1450,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, gen8_pte_t scratch_pte, struct seq_file *m) { - struct i915_address_space *vm = &ppgtt->base; + struct i915_address_space *vm = &ppgtt->vm; struct i915_page_directory *pd; u32 pdpe; @@ -1460,7 +1460,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, u64 pd_start = start; u32 pde; - if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) + if (pdp->page_directory[pdpe] == ppgtt->vm.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); @@ -1468,7 +1468,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, u32 pte; gen8_pte_t *pt_vaddr; - if (pd->page_table[pde] == ppgtt->base.scratch_pt) + if (pd->page_table[pde] == ppgtt->vm.scratch_pt) continue; pt_vaddr = kmap_atomic_px(pt); @@ -1501,10 +1501,10 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { - struct i915_address_space *vm = &ppgtt->base; + struct i915_address_space *vm = &ppgtt->vm; const gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); - u64 start = 0, length = ppgtt->base.total; + u64 start = 0, length = ppgtt->vm.total; if (use_4lvl(vm)) { u64 pml4e; @@ -1512,7 +1512,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) + if (pml4->pdps[pml4e] == ppgtt->vm.scratch_pdp) continue; seq_printf(m, " PML4E #%llu\n", pml4e); @@ -1525,10 +1525,10 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) { - struct i915_address_space *vm = &ppgtt->base; + struct i915_address_space *vm = &ppgtt->vm; struct i915_page_directory_pointer *pdp = &ppgtt->pdp; struct i915_page_directory *pd; - u64 start = 0, length = ppgtt->base.total; + u64 start = 0, length = ppgtt->vm.total; u64 from = start; unsigned int pdpe; @@ -1564,11 +1564,11 @@ unwind: */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { - struct i915_address_space *vm = &ppgtt->base; + struct i915_address_space *vm = &ppgtt->vm; struct drm_i915_private *dev_priv = vm->i915; int ret; - ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? + ppgtt->vm.total = USES_FULL_48BIT_PPGTT(dev_priv) ? 1ULL << 48 : 1ULL << 32; @@ -1576,26 +1576,26 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) * And we are not sure about the latter so play safe for now. */ if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) - ppgtt->base.pt_kmap_wc = true; + ppgtt->vm.pt_kmap_wc = true; - ret = gen8_init_scratch(&ppgtt->base); + ret = gen8_init_scratch(&ppgtt->vm); if (ret) { - ppgtt->base.total = 0; + ppgtt->vm.total = 0; return ret; } if (use_4lvl(vm)) { - ret = setup_px(&ppgtt->base, &ppgtt->pml4); + ret = setup_px(&ppgtt->vm, &ppgtt->pml4); if (ret) goto free_scratch; - gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); + gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4); - ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; - ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; - ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; + ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; + ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; + ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; } else { - ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); + ret = __pdp_init(&ppgtt->vm, &ppgtt->pdp); if (ret) goto free_scratch; @@ -1607,35 +1607,35 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } } - ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; - ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; - ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; + ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; + ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; + ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, true); - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.bind_vma = gen8_ppgtt_bind_vma; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.set_pages = ppgtt_set_pages; - ppgtt->base.clear_pages = clear_pages; + ppgtt->vm.cleanup = gen8_ppgtt_cleanup; + ppgtt->vm.bind_vma = gen8_ppgtt_bind_vma; + ppgtt->vm.unbind_vma = ppgtt_unbind_vma; + ppgtt->vm.set_pages = ppgtt_set_pages; + ppgtt->vm.clear_pages = clear_pages; ppgtt->debug_dump = gen8_dump_ppgtt; return 0; free_scratch: - gen8_free_scratch(&ppgtt->base); + gen8_free_scratch(&ppgtt->vm); return ret; } static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { - struct i915_address_space *vm = &ppgtt->base; + struct i915_address_space *vm = &ppgtt->vm; struct i915_page_table *unused; gen6_pte_t scratch_pte; u32 pd_entry, pte, pde; - u32 start = 0, length = ppgtt->base.total; + u32 start = 0, length = ppgtt->vm.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); @@ -1972,8 +1972,8 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) { - struct i915_address_space *vm = &ppgtt->base; - struct drm_i915_private *dev_priv = ppgtt->base.i915; + struct i915_address_space *vm = &ppgtt->vm; + struct drm_i915_private *dev_priv = ppgtt->vm.i915; struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; @@ -1981,16 +1981,16 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) * allocator works in address space sizes, so it's multiplied by page * size. We allocate at the top of the GTT to avoid fragmentation. */ - BUG_ON(!drm_mm_initialized(&ggtt->base.mm)); + BUG_ON(!drm_mm_initialized(&ggtt->vm.mm)); ret = gen6_init_scratch(vm); if (ret) return ret; - ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node, + ret = i915_gem_gtt_insert(&ggtt->vm, &ppgtt->node, GEN6_PD_SIZE, GEN6_PD_ALIGN, I915_COLOR_UNEVICTABLE, - 0, ggtt->base.total, + 0, ggtt->vm.total, PIN_HIGH); if (ret) goto err_out; @@ -2023,16 +2023,16 @@ static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, u32 pde; gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) - ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; + ppgtt->pd.page_table[pde] = ppgtt->vm.scratch_pt; } static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; + struct drm_i915_private *dev_priv = ppgtt->vm.i915; struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; - ppgtt->base.pte_encode = ggtt->base.pte_encode; + ppgtt->vm.pte_encode = ggtt->vm.pte_encode; if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv)) ppgtt->switch_mm = gen6_mm_switch; else if (IS_HASWELL(dev_priv)) @@ -2046,24 +2046,24 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; + ppgtt->vm.total = I915_PDES * GEN6_PTES * PAGE_SIZE; - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - gen6_write_page_range(ppgtt, 0, ppgtt->base.total); + gen6_scratch_va_range(ppgtt, 0, ppgtt->vm.total); + gen6_write_page_range(ppgtt, 0, ppgtt->vm.total); - ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); + ret = gen6_alloc_va_range(&ppgtt->vm, 0, ppgtt->vm.total); if (ret) { - gen6_ppgtt_cleanup(&ppgtt->base); + gen6_ppgtt_cleanup(&ppgtt->vm); return ret; } - ppgtt->base.clear_range = gen6_ppgtt_clear_range; - ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->base.bind_vma = gen6_ppgtt_bind_vma; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.set_pages = ppgtt_set_pages; - ppgtt->base.clear_pages = clear_pages; - ppgtt->base.cleanup = gen6_ppgtt_cleanup; + ppgtt->vm.clear_range = gen6_ppgtt_clear_range; + ppgtt->vm.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->vm.bind_vma = gen6_ppgtt_bind_vma; + ppgtt->vm.unbind_vma = ppgtt_unbind_vma; + ppgtt->vm.set_pages = ppgtt_set_pages; + ppgtt->vm.clear_pages = clear_pages; + ppgtt->vm.cleanup = gen6_ppgtt_cleanup; ppgtt->debug_dump = gen6_dump_ppgtt; DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", @@ -2079,8 +2079,8 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv) { - ppgtt->base.i915 = dev_priv; - ppgtt->base.dma = &dev_priv->drm.pdev->dev; + ppgtt->vm.i915 = dev_priv; + ppgtt->vm.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) < 8) return gen6_ppgtt_init(ppgtt); @@ -2190,10 +2190,10 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, } kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv, name); - ppgtt->base.file = fpriv; + i915_address_space_init(&ppgtt->vm, dev_priv, name); + ppgtt->vm.file = fpriv; - trace_i915_ppgtt_create(&ppgtt->base); + trace_i915_ppgtt_create(&ppgtt->vm); return ppgtt; } @@ -2227,16 +2227,16 @@ void i915_ppgtt_release(struct kref *kref) struct i915_hw_ppgtt *ppgtt = container_of(kref, struct i915_hw_ppgtt, ref); - trace_i915_ppgtt_release(&ppgtt->base); + trace_i915_ppgtt_release(&ppgtt->vm); - ppgtt_destroy_vma(&ppgtt->base); + ppgtt_destroy_vma(&ppgtt->vm); - GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); - GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); - GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); + GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); + GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list)); + GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list)); - ppgtt->base.cleanup(&ppgtt->base); - i915_address_space_fini(&ppgtt->base); + ppgtt->vm.cleanup(&ppgtt->vm); + i915_address_space_fini(&ppgtt->vm); kfree(ppgtt); } @@ -2332,7 +2332,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); - ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); i915_ggtt_invalidate(dev_priv); } @@ -2675,16 +2675,16 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; if (!(vma->flags & I915_VMA_LOCAL_BIND) && - appgtt->base.allocate_va_range) { - ret = appgtt->base.allocate_va_range(&appgtt->base, - vma->node.start, - vma->size); + appgtt->vm.allocate_va_range) { + ret = appgtt->vm.allocate_va_range(&appgtt->vm, + vma->node.start, + vma->size); if (ret) return ret; } - appgtt->base.insert_entries(&appgtt->base, vma, cache_level, - pte_flags); + appgtt->vm.insert_entries(&appgtt->vm, vma, cache_level, + pte_flags); } if (flags & I915_VMA_GLOBAL_BIND) { @@ -2707,7 +2707,7 @@ static void aliasing_gtt_unbind_vma(struct i915_vma *vma) } if (vma->flags & I915_VMA_LOCAL_BIND) { - struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; + struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->vm; vm->clear_range(vm, vma->node.start, vma->size); } @@ -2774,30 +2774,30 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); - if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { + if (WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { err = -ENODEV; goto err_ppgtt; } - if (ppgtt->base.allocate_va_range) { + if (ppgtt->vm.allocate_va_range) { /* Note we only pre-allocate as far as the end of the global * GTT. On 48b / 4-level page-tables, the difference is very, * very significant! We have to preallocate as GVT/vgpu does * not like the page directory disappearing. */ - err = ppgtt->base.allocate_va_range(&ppgtt->base, - 0, ggtt->base.total); + err = ppgtt->vm.allocate_va_range(&ppgtt->vm, + 0, ggtt->vm.total); if (err) goto err_ppgtt; } i915->mm.aliasing_ppgtt = ppgtt; - GEM_BUG_ON(ggtt->base.bind_vma != ggtt_bind_vma); - ggtt->base.bind_vma = aliasing_gtt_bind_vma; + GEM_BUG_ON(ggtt->vm.bind_vma != ggtt_bind_vma); + ggtt->vm.bind_vma = aliasing_gtt_bind_vma; - GEM_BUG_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); - ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; + GEM_BUG_ON(ggtt->vm.unbind_vma != ggtt_unbind_vma); + ggtt->vm.unbind_vma = aliasing_gtt_unbind_vma; return 0; @@ -2817,8 +2817,8 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) i915_ppgtt_put(ppgtt); - ggtt->base.bind_vma = ggtt_bind_vma; - ggtt->base.unbind_vma = ggtt_unbind_vma; + ggtt->vm.bind_vma = ggtt_bind_vma; + ggtt->vm.unbind_vma = ggtt_unbind_vma; } int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) @@ -2842,7 +2842,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) return ret; /* Reserve a mappable slot for our lockless error capture */ - ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture, + ret = drm_mm_insert_node_in_range(&ggtt->vm.mm, &ggtt->error_capture, PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); @@ -2850,16 +2850,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) return ret; /* Clear any non-preallocated blocks */ - drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { + drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", hole_start, hole_end); - ggtt->base.clear_range(&ggtt->base, hole_start, - hole_end - hole_start); + ggtt->vm.clear_range(&ggtt->vm, hole_start, + hole_end - hole_start); } /* And finally clear the reserved guard page */ - ggtt->base.clear_range(&ggtt->base, - ggtt->base.total - PAGE_SIZE, PAGE_SIZE); + ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { ret = i915_gem_init_aliasing_ppgtt(dev_priv); @@ -2884,11 +2883,11 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) struct i915_vma *vma, *vn; struct pagevec *pvec; - ggtt->base.closed = true; + ggtt->vm.closed = true; mutex_lock(&dev_priv->drm.struct_mutex); - GEM_BUG_ON(!list_empty(&ggtt->base.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) + GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) WARN_ON(i915_vma_unbind(vma)); mutex_unlock(&dev_priv->drm.struct_mutex); @@ -2900,12 +2899,12 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); - if (drm_mm_initialized(&ggtt->base.mm)) { + if (drm_mm_initialized(&ggtt->vm.mm)) { intel_vgt_deballoon(dev_priv); - i915_address_space_fini(&ggtt->base); + i915_address_space_fini(&ggtt->vm); } - ggtt->base.cleanup(&ggtt->base); + ggtt->vm.cleanup(&ggtt->vm); pvec = &dev_priv->mm.wc_stash; if (pvec->nr) { @@ -2955,7 +2954,7 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) { - struct drm_i915_private *dev_priv = ggtt->base.i915; + struct drm_i915_private *dev_priv = ggtt->vm.i915; struct pci_dev *pdev = dev_priv->drm.pdev; phys_addr_t phys_addr; int ret; @@ -2979,7 +2978,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(&ggtt->base, GFP_DMA32); + ret = setup_scratch_page(&ggtt->vm, GFP_DMA32); if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -3285,7 +3284,7 @@ static void setup_private_pat(struct drm_i915_private *dev_priv) static int gen8_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_i915_private *dev_priv = ggtt->base.i915; + struct drm_i915_private *dev_priv = ggtt->vm.i915; struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int size; u16 snb_gmch_ctl; @@ -3309,25 +3308,25 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) else size = gen8_get_total_gtt_size(snb_gmch_ctl); - ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; - ggtt->base.cleanup = gen6_gmch_remove; - ggtt->base.bind_vma = ggtt_bind_vma; - ggtt->base.unbind_vma = ggtt_unbind_vma; - ggtt->base.set_pages = ggtt_set_pages; - ggtt->base.clear_pages = clear_pages; - ggtt->base.insert_page = gen8_ggtt_insert_page; - ggtt->base.clear_range = nop_clear_range; + ggtt->vm.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; + ggtt->vm.cleanup = gen6_gmch_remove; + ggtt->vm.bind_vma = ggtt_bind_vma; + ggtt->vm.unbind_vma = ggtt_unbind_vma; + ggtt->vm.set_pages = ggtt_set_pages; + ggtt->vm.clear_pages = clear_pages; + ggtt->vm.insert_page = gen8_ggtt_insert_page; + ggtt->vm.clear_range = nop_clear_range; if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv)) - ggtt->base.clear_range = gen8_ggtt_clear_range; + ggtt->vm.clear_range = gen8_ggtt_clear_range; - ggtt->base.insert_entries = gen8_ggtt_insert_entries; + ggtt->vm.insert_entries = gen8_ggtt_insert_entries; /* Serialize GTT updates with aperture access on BXT if VT-d is on. */ if (intel_ggtt_update_needs_vtd_wa(dev_priv)) { - ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; - ggtt->base.insert_page = bxt_vtd_ggtt_insert_page__BKL; - if (ggtt->base.clear_range != nop_clear_range) - ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL; + ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; + ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; + if (ggtt->vm.clear_range != nop_clear_range) + ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; } ggtt->invalidate = gen6_ggtt_invalidate; @@ -3339,7 +3338,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) static int gen6_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_i915_private *dev_priv = ggtt->base.i915; + struct drm_i915_private *dev_priv = ggtt->vm.i915; struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int size; u16 snb_gmch_ctl; @@ -3366,29 +3365,29 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); size = gen6_get_total_gtt_size(snb_gmch_ctl); - ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; + ggtt->vm.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT; - ggtt->base.clear_range = gen6_ggtt_clear_range; - ggtt->base.insert_page = gen6_ggtt_insert_page; - ggtt->base.insert_entries = gen6_ggtt_insert_entries; - ggtt->base.bind_vma = ggtt_bind_vma; - ggtt->base.unbind_vma = ggtt_unbind_vma; - ggtt->base.set_pages = ggtt_set_pages; - ggtt->base.clear_pages = clear_pages; - ggtt->base.cleanup = gen6_gmch_remove; + ggtt->vm.clear_range = gen6_ggtt_clear_range; + ggtt->vm.insert_page = gen6_ggtt_insert_page; + ggtt->vm.insert_entries = gen6_ggtt_insert_entries; + ggtt->vm.bind_vma = ggtt_bind_vma; + ggtt->vm.unbind_vma = ggtt_unbind_vma; + ggtt->vm.set_pages = ggtt_set_pages; + ggtt->vm.clear_pages = clear_pages; + ggtt->vm.cleanup = gen6_gmch_remove; ggtt->invalidate = gen6_ggtt_invalidate; if (HAS_EDRAM(dev_priv)) - ggtt->base.pte_encode = iris_pte_encode; + ggtt->vm.pte_encode = iris_pte_encode; else if (IS_HASWELL(dev_priv)) - ggtt->base.pte_encode = hsw_pte_encode; + ggtt->vm.pte_encode = hsw_pte_encode; else if (IS_VALLEYVIEW(dev_priv)) - ggtt->base.pte_encode = byt_pte_encode; + ggtt->vm.pte_encode = byt_pte_encode; else if (INTEL_GEN(dev_priv) >= 7) - ggtt->base.pte_encode = ivb_pte_encode; + ggtt->vm.pte_encode = ivb_pte_encode; else - ggtt->base.pte_encode = snb_pte_encode; + ggtt->vm.pte_encode = snb_pte_encode; return ggtt_probe_common(ggtt, size); } @@ -3400,7 +3399,7 @@ static void i915_gmch_remove(struct i915_address_space *vm) static int i915_gmch_probe(struct i915_ggtt *ggtt) { - struct drm_i915_private *dev_priv = ggtt->base.i915; + struct drm_i915_private *dev_priv = ggtt->vm.i915; phys_addr_t gmadr_base; int ret; @@ -3410,23 +3409,21 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) return -EIO; } - intel_gtt_get(&ggtt->base.total, - &gmadr_base, - &ggtt->mappable_end); + intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end); ggtt->gmadr = (struct resource) DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); ggtt->do_idle_maps = needs_idle_maps(dev_priv); - ggtt->base.insert_page = i915_ggtt_insert_page; - ggtt->base.insert_entries = i915_ggtt_insert_entries; - ggtt->base.clear_range = i915_ggtt_clear_range; - ggtt->base.bind_vma = ggtt_bind_vma; - ggtt->base.unbind_vma = ggtt_unbind_vma; - ggtt->base.set_pages = ggtt_set_pages; - ggtt->base.clear_pages = clear_pages; - ggtt->base.cleanup = i915_gmch_remove; + ggtt->vm.insert_page = i915_ggtt_insert_page; + ggtt->vm.insert_entries = i915_ggtt_insert_entries; + ggtt->vm.clear_range = i915_ggtt_clear_range; + ggtt->vm.bind_vma = ggtt_bind_vma; + ggtt->vm.unbind_vma = ggtt_unbind_vma; + ggtt->vm.set_pages = ggtt_set_pages; + ggtt->vm.clear_pages = clear_pages; + ggtt->vm.cleanup = i915_gmch_remove; ggtt->invalidate = gmch_ggtt_invalidate; @@ -3445,8 +3442,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) struct i915_ggtt *ggtt = &dev_priv->ggtt; int ret; - ggtt->base.i915 = dev_priv; - ggtt->base.dma = &dev_priv->drm.pdev->dev; + ggtt->vm.i915 = dev_priv; + ggtt->vm.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); @@ -3463,27 +3460,29 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) * restriction! */ if (USES_GUC(dev_priv)) { - ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP); - ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total); + ggtt->vm.total = min_t(u64, ggtt->vm.total, GUC_GGTT_TOP); + ggtt->mappable_end = + min_t(u64, ggtt->mappable_end, ggtt->vm.total); } - if ((ggtt->base.total - 1) >> 32) { + if ((ggtt->vm.total - 1) >> 32) { DRM_ERROR("We never expected a Global GTT with more than 32bits" " of address space! Found %lldM!\n", - ggtt->base.total >> 20); - ggtt->base.total = 1ULL << 32; - ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total); + ggtt->vm.total >> 20); + ggtt->vm.total = 1ULL << 32; + ggtt->mappable_end = + min_t(u64, ggtt->mappable_end, ggtt->vm.total); } - if (ggtt->mappable_end > ggtt->base.total) { + if (ggtt->mappable_end > ggtt->vm.total) { DRM_ERROR("mappable aperture extends past end of GGTT," " aperture=%pa, total=%llx\n", - &ggtt->mappable_end, ggtt->base.total); - ggtt->mappable_end = ggtt->base.total; + &ggtt->mappable_end, ggtt->vm.total); + ggtt->mappable_end = ggtt->vm.total; } /* GMADR is the PCI mmio aperture into the global GTT. */ - DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->base.total >> 20); + DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20); DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20); DRM_DEBUG_DRIVER("DSM size = %lluM\n", (u64)resource_size(&intel_graphics_stolen_res) >> 20); @@ -3510,9 +3509,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - i915_address_space_init(&ggtt->base, dev_priv, "[global]"); + i915_address_space_init(&ggtt->vm, dev_priv, "[global]"); if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) - ggtt->base.mm.color_adjust = i915_gtt_color_adjust; + ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); if (!io_mapping_init_wc(&dev_priv->ggtt.iomap, @@ -3535,7 +3534,7 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) return 0; out_gtt_cleanup: - ggtt->base.cleanup(&ggtt->base); + ggtt->vm.cleanup(&ggtt->vm); return ret; } @@ -3574,13 +3573,13 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); /* First fill our portion of the GTT with scratch pages */ - ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); + ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); - ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ + ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - GEM_BUG_ON(!list_empty(&ggtt->base.active_list)); - list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) { + GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) { struct drm_i915_gem_object *obj = vma->obj; if (!(vma->flags & I915_VMA_GLOBAL_BIND)) @@ -3593,7 +3592,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false)); } - ggtt->base.closed = false; + ggtt->vm.closed = false; if (INTEL_GEN(dev_priv) >= 8) { struct intel_ppat *ppat = &dev_priv->ppat; @@ -3616,7 +3615,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) if (!ppgtt) continue; - gen6_write_page_range(ppgtt, 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->vm.total); } } @@ -3838,7 +3837,7 @@ int i915_gem_gtt_reserve(struct i915_address_space *vm, GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT)); GEM_BUG_ON(range_overflows(offset, size, vm->total)); - GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); GEM_BUG_ON(drm_mm_node_allocated(node)); node->size = size; @@ -3935,7 +3934,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, GEM_BUG_ON(start >= end); GEM_BUG_ON(start > 0 && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); - GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); GEM_BUG_ON(drm_mm_node_allocated(node)); if (unlikely(range_overflows(start, size, end))) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index aec4f73574f4..197c2c06ecb7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -65,7 +65,7 @@ typedef u64 gen8_pde_t; typedef u64 gen8_ppgtt_pdpe_t; typedef u64 gen8_ppgtt_pml4e_t; -#define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT) +#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT) /* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) @@ -367,7 +367,7 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm) * the spec. */ struct i915_ggtt { - struct i915_address_space base; + struct i915_address_space vm; struct io_mapping iomap; /* Mapping to our CPU mappable region */ struct resource gmadr; /* GMADR resource */ @@ -385,7 +385,7 @@ struct i915_ggtt { }; struct i915_hw_ppgtt { - struct i915_address_space base; + struct i915_address_space vm; struct kref ref; struct drm_mm_node node; unsigned long pd_dirty_rings; @@ -543,7 +543,7 @@ static inline struct i915_ggtt * i915_vm_to_ggtt(struct i915_address_space *vm) { GEM_BUG_ON(!i915_is_ggtt(vm)); - return container_of(vm, struct i915_ggtt, base); + return container_of(vm, struct i915_ggtt, vm); } #define INTEL_MAX_PPAT_ENTRIES 8 diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 1036e8686916..3210cedfa46c 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -194,7 +194,7 @@ int i915_gem_render_state_emit(struct i915_request *rq) if (IS_ERR(so.obj)) return PTR_ERR(so.obj); - so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.base, NULL); + so.vma = i915_vma_instance(so.obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(so.vma)) { err = PTR_ERR(so.vma); goto err_obj; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 5757fb7c4b5a..55e84e71f526 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -480,7 +480,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr /* We also want to clear any cached iomaps as they wrap vmap */ list_for_each_entry_safe(vma, next, - &i915->ggtt.base.inactive_list, vm_link) { + &i915->ggtt.vm.inactive_list, vm_link) { unsigned long count = vma->node.size >> PAGE_SHIFT; if (vma->iomap && i915_vma_unbind(vma) == 0) freed_pages += count; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index ad949cc30928..79a347295e00 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -642,7 +642,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv if (ret) goto err; - vma = i915_vma_instance(obj, &ggtt->base, NULL); + vma = i915_vma_instance(obj, &ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_pages; @@ -653,7 +653,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv * setting up the GTT space. The actual reservation will occur * later. */ - ret = i915_gem_gtt_reserve(&ggtt->base, &vma->node, + ret = i915_gem_gtt_reserve(&ggtt->vm, &vma->node, size, gtt_offset, obj->cache_level, 0); if (ret) { @@ -666,7 +666,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); - list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); + list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list); spin_lock(&dev_priv->mm.obj_lock); list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 6702776303bf..758234d20f4e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -973,8 +973,7 @@ i915_error_object_create(struct drm_i915_private *i915, void __iomem *s; int ret; - ggtt->base.insert_page(&ggtt->base, dma, slot, - I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); ret = compress_page(&compress, (void __force *)s, dst); @@ -993,7 +992,7 @@ unwind: out: compress_fini(&compress, dst); - ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE); + ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); return dst; } @@ -1466,7 +1465,7 @@ static void gem_record_rings(struct i915_gpu_state *error) struct i915_gem_context *ctx = request->gem_context; struct intel_ring *ring; - ee->vm = ctx->ppgtt ? &ctx->ppgtt->base : &ggtt->base; + ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm; record_context(&ee->context, ctx); @@ -1564,7 +1563,7 @@ static void capture_active_buffers(struct i915_gpu_state *error) static void capture_pinned_buffers(struct i915_gpu_state *error) { - struct i915_address_space *vm = &error->i915->ggtt.base; + struct i915_address_space *vm = &error->i915->ggtt.vm; struct drm_i915_error_buffer *bo; struct i915_vma *vma; int count_inactive, count_active; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 3d5716d86e27..1472f48ab2e8 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -956,7 +956,7 @@ DECLARE_EVENT_CLASS(i915_context, __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; __entry->hw_id = ctx->hw_id; - __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; + __entry->vm = ctx->ppgtt ? &ctx->ppgtt->vm : NULL; ), TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", @@ -997,7 +997,7 @@ TRACE_EVENT(switch_mm, __entry->class = engine->uabi_class; __entry->instance = engine->instance; __entry->to = to; - __entry->vm = to->ppgtt? &to->ppgtt->base : NULL; + __entry->vm = to->ppgtt ? &to->ppgtt->vm : NULL; __entry->dev = engine->i915->drm.primary->index; ), diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index 5fe9f3f39467..869cf4a3b6de 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -105,7 +105,7 @@ static void vgt_deballoon_space(struct i915_ggtt *ggtt, node->start + node->size, node->size / 1024); - ggtt->base.reserved -= node->size; + ggtt->vm.reserved -= node->size; drm_mm_remove_node(node); } @@ -141,11 +141,11 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, DRM_INFO("balloon space: range [ 0x%lx - 0x%lx ] %lu KiB.\n", start, end, size / 1024); - ret = i915_gem_gtt_reserve(&ggtt->base, node, + ret = i915_gem_gtt_reserve(&ggtt->vm, node, size, start, I915_COLOR_UNEVICTABLE, 0); if (!ret) - ggtt->base.reserved += size; + ggtt->vm.reserved += size; return ret; } @@ -197,7 +197,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, int intel_vgt_balloon(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long ggtt_end = ggtt->base.total; + unsigned long ggtt_end = ggtt->vm.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 10bf654cd023..912f16ffe7ee 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -85,7 +85,7 @@ vma_create(struct drm_i915_gem_object *obj, int i; /* The aliasing_ppgtt should never be used directly! */ - GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->vm); vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 13448ea76f57..2ec2e60dc670 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -515,7 +515,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) return PTR_ERR(obj); } - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unref; @@ -585,7 +585,7 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index e28a996b9604..29fd95c1306b 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -570,7 +570,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 23e9f2023bc5..f3945258fe1b 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -536,7 +536,7 @@ static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) */ static void flush_ggtt_writes(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(vma->obj->base.dev); + struct drm_i915_private *dev_priv = vma->vm->i915; if (i915_vma_is_map_and_fenceable(vma)) POSTING_READ_FW(GUC_STATUS); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f6f09f808f74..0935a19aca96 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -431,7 +431,7 @@ static u64 execlists_update_context(struct i915_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) + if (ppgtt && !i915_vm_is_48bit(&ppgtt->vm)) execlists_update_context_pdps(ppgtt, reg_state); return ce->lrc_desc; @@ -1672,7 +1672,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -2070,7 +2070,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, * not needed in 48-bit.*/ if (rq->gem_context->ppgtt && (intel_engine_flag(rq->engine) & rq->gem_context->ppgtt->pd_dirty_rings) && - !i915_vm_is_48bit(&rq->gem_context->ppgtt->base) && + !i915_vm_is_48bit(&rq->gem_context->ppgtt->vm) && !intel_vgpu_active(rq->i915)) { ret = intel_logical_ring_emit_pdps(rq); if (ret) @@ -2668,7 +2668,7 @@ static void execlists_init_reg_state(u32 *regs, CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); - if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { + if (ppgtt && i915_vm_is_48bit(&ppgtt->vm)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. @@ -2774,7 +2774,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); + vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3f0eb538df09..6496c1d00dbb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1121,7 +1121,7 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL); if (IS_ERR(vma)) goto err; @@ -1300,7 +1300,7 @@ alloc_context_vma(struct intel_engine_cs *engine) i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC); } - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 91c72911be3c..7846ea4a99bc 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -338,7 +338,7 @@ fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) static int igt_check_page_sizes(struct i915_vma *vma) { - struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); + struct drm_i915_private *i915 = vma->vm->i915; unsigned int supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj = vma->obj; int err = 0; @@ -379,7 +379,7 @@ static int igt_check_page_sizes(struct i915_vma *vma) static int igt_mock_exhaust_device_supported_pages(void *arg) { struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_private *i915 = ppgtt->vm.i915; unsigned int saved_mask = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -415,7 +415,7 @@ static int igt_mock_exhaust_device_supported_pages(void *arg) goto out_put; } - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_put; @@ -458,7 +458,7 @@ out_device: static int igt_mock_ppgtt_misaligned_dma(void *arg) { struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_private *i915 = ppgtt->vm.i915; unsigned long supported = INTEL_INFO(i915)->page_sizes; struct drm_i915_gem_object *obj; int bit; @@ -500,7 +500,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) /* Force the page size for this object */ obj->mm.page_sizes.sg = page_size; - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_unpin; @@ -591,7 +591,7 @@ static void close_object_list(struct list_head *objects, list_for_each_entry_safe(obj, on, objects, st_link) { struct i915_vma *vma; - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (!IS_ERR(vma)) i915_vma_close(vma); @@ -604,8 +604,8 @@ static void close_object_list(struct list_head *objects, static int igt_mock_ppgtt_huge_fill(void *arg) { struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->base.i915; - unsigned long max_pages = ppgtt->base.total >> PAGE_SHIFT; + struct drm_i915_private *i915 = ppgtt->vm.i915; + unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT; unsigned long page_num; bool single = false; LIST_HEAD(objects); @@ -641,7 +641,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); break; @@ -725,7 +725,7 @@ static int igt_mock_ppgtt_huge_fill(void *arg) static int igt_mock_ppgtt_64K(void *arg) { struct i915_hw_ppgtt *ppgtt = arg; - struct drm_i915_private *i915 = ppgtt->base.i915; + struct drm_i915_private *i915 = ppgtt->vm.i915; struct drm_i915_gem_object *obj; const struct object_info { unsigned int size; @@ -819,7 +819,7 @@ static int igt_mock_ppgtt_64K(void *arg) */ obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_object_unpin; @@ -887,8 +887,8 @@ out_object_put: static struct i915_vma * gpu_write_dw(struct i915_vma *vma, u64 offset, u32 val) { - struct drm_i915_private *i915 = to_i915(vma->obj->base.dev); - const int gen = INTEL_GEN(vma->vm->i915); + struct drm_i915_private *i915 = vma->vm->i915; + const int gen = INTEL_GEN(i915); unsigned int count = vma->size >> PAGE_SHIFT; struct drm_i915_gem_object *obj; struct i915_vma *batch; @@ -1047,7 +1047,8 @@ static int __igt_write_huge(struct i915_gem_context *ctx, u32 dword, u32 val) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; struct i915_vma *vma; int err; @@ -1100,7 +1101,8 @@ static int igt_write_huge(struct i915_gem_context *ctx, struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; static struct intel_engine_cs *engines[I915_NUM_ENGINES]; struct intel_engine_cs *engine; I915_RND_STATE(prng); @@ -1439,7 +1441,7 @@ static int igt_ppgtt_pin_update(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_put; @@ -1493,7 +1495,7 @@ static int igt_ppgtt_pin_update(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - vma = i915_vma_instance(obj, &ppgtt->base, NULL); + vma = i915_vma_instance(obj, &ppgtt->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out_put; @@ -1531,7 +1533,8 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1587,7 +1590,8 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; struct drm_i915_gem_object *obj; struct i915_vma *vma; unsigned int flags = PIN_USER; @@ -1696,14 +1700,14 @@ int i915_gem_huge_page_mock_selftests(void) goto out_unlock; } - if (!i915_vm_is_48bit(&ppgtt->base)) { + if (!i915_vm_is_48bit(&ppgtt->vm)) { pr_err("failed to create 48b PPGTT\n"); err = -EINVAL; goto out_close; } /* If we were ever hit this then it's time to mock the 64K scratch */ - if (!i915_vm_has_scratch_64K(&ppgtt->base)) { + if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { pr_err("PPGTT missing 64K scratch page\n"); err = -EINVAL; goto out_close; @@ -1712,7 +1716,7 @@ int i915_gem_huge_page_mock_selftests(void) err = i915_subtests(tests, ppgtt); out_close: - i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_close(&ppgtt->vm); i915_ppgtt_put(ppgtt); out_unlock: @@ -1758,7 +1762,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) } if (ctx->ppgtt) - ctx->ppgtt->base.scrub_64K = true; + ctx->ppgtt->vm.scrub_64K = true; err = i915_subtests(tests, ctx); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index b39392a00a6f..708e8d721448 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -115,7 +115,7 @@ static int gpu_fill(struct drm_i915_gem_object *obj, { struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; struct i915_request *rq; struct i915_vma *vma; struct i915_vma *batch; @@ -290,7 +290,7 @@ create_test_object(struct i915_gem_context *ctx, { struct drm_i915_gem_object *obj; struct i915_address_space *vm = - ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + ctx->ppgtt ? &ctx->ppgtt->vm : &ctx->i915->ggtt.vm; u64 size; int err; @@ -557,7 +557,7 @@ static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { struct i915_vma *vma; - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) continue; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index ab9d7bee0aae..2dc72a984d45 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -35,7 +35,7 @@ static int populate_ggtt(struct drm_i915_private *i915) u64 size; for (size = 0; - size + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + size + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; size += I915_GTT_PAGE_SIZE) { struct i915_vma *vma; @@ -57,7 +57,7 @@ static int populate_ggtt(struct drm_i915_private *i915) return -EINVAL; } - if (list_empty(&i915->ggtt.base.inactive_list)) { + if (list_empty(&i915->ggtt.vm.inactive_list)) { pr_err("No objects on the GGTT inactive list!\n"); return -EINVAL; } @@ -69,7 +69,7 @@ static void unpin_ggtt(struct drm_i915_private *i915) { struct i915_vma *vma; - list_for_each_entry(vma, &i915->ggtt.base.inactive_list, vm_link) + list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) i915_vma_unpin(vma); } @@ -103,7 +103,7 @@ static int igt_evict_something(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ - err = i915_gem_evict_something(&ggtt->base, + err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); @@ -116,7 +116,7 @@ static int igt_evict_something(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict something */ - err = i915_gem_evict_something(&ggtt->base, + err = i915_gem_evict_something(&ggtt->vm, I915_GTT_PAGE_SIZE, 0, 0, 0, U64_MAX, 0); @@ -181,7 +181,7 @@ static int igt_evict_for_vma(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ - err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); if (err != -ENOSPC) { pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", err); @@ -191,7 +191,7 @@ static int igt_evict_for_vma(void *arg) unpin_ggtt(i915); /* Everything is unpinned, we should be able to evict the node */ - err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); if (err) { pr_err("i915_gem_evict_for_node returned err=%d\n", err); @@ -229,7 +229,7 @@ static int igt_evict_for_cache_color(void *arg) * i915_gtt_color_adjust throughout our driver, so using a mock color * adjust will work just fine for our purposes. */ - ggtt->base.mm.color_adjust = mock_color_adjust; + ggtt->vm.mm.color_adjust = mock_color_adjust; obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); if (IS_ERR(obj)) { @@ -265,7 +265,7 @@ static int igt_evict_for_cache_color(void *arg) i915_vma_unpin(vma); /* Remove just the second vma */ - err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); if (err) { pr_err("[0]i915_gem_evict_for_node returned err=%d\n", err); goto cleanup; @@ -276,7 +276,7 @@ static int igt_evict_for_cache_color(void *arg) */ target.color = I915_CACHE_L3_LLC; - err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + err = i915_gem_evict_for_node(&ggtt->vm, &target, 0); if (!err) { pr_err("[1]i915_gem_evict_for_node returned err=%d\n", err); err = -EINVAL; @@ -288,7 +288,7 @@ static int igt_evict_for_cache_color(void *arg) cleanup: unpin_ggtt(i915); cleanup_objects(i915); - ggtt->base.mm.color_adjust = NULL; + ggtt->vm.mm.color_adjust = NULL; return err; } @@ -305,7 +305,7 @@ static int igt_evict_vm(void *arg) goto cleanup; /* Everything is pinned, nothing should happen */ - err = i915_gem_evict_vm(&ggtt->base); + err = i915_gem_evict_vm(&ggtt->vm); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -314,7 +314,7 @@ static int igt_evict_vm(void *arg) unpin_ggtt(i915); - err = i915_gem_evict_vm(&ggtt->base); + err = i915_gem_evict_vm(&ggtt->vm); if (err) { pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", err); @@ -359,9 +359,9 @@ static int igt_evict_contexts(void *arg) /* Reserve a block so that we know we have enough to fit a few rq */ memset(&hole, 0, sizeof(hole)); - err = i915_gem_gtt_insert(&i915->ggtt.base, &hole, + err = i915_gem_gtt_insert(&i915->ggtt.vm, &hole, PRETEND_GGTT_SIZE, 0, I915_COLOR_UNEVICTABLE, - 0, i915->ggtt.base.total, + 0, i915->ggtt.vm.total, PIN_NOEVICT); if (err) goto out_locked; @@ -377,9 +377,9 @@ static int igt_evict_contexts(void *arg) goto out_locked; } - if (i915_gem_gtt_insert(&i915->ggtt.base, &r->node, + if (i915_gem_gtt_insert(&i915->ggtt.vm, &r->node, 1ul << 20, 0, I915_COLOR_UNEVICTABLE, - 0, i915->ggtt.base.total, + 0, i915->ggtt.vm.total, PIN_NOEVICT)) { kfree(r); break; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f7dc926f4ef1..58ab5e84ceb7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -151,14 +151,14 @@ static int igt_ppgtt_alloc(void *arg) if (err) goto err_ppgtt; - if (!ppgtt->base.allocate_va_range) + if (!ppgtt->vm.allocate_va_range) goto err_ppgtt_cleanup; /* Check we can allocate the entire range */ for (size = 4096; - size <= ppgtt->base.total; + size <= ppgtt->vm.total; size <<= 2) { - err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, size); + err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, size); if (err) { if (err == -ENOMEM) { pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", @@ -168,15 +168,15 @@ static int igt_ppgtt_alloc(void *arg) goto err_ppgtt_cleanup; } - ppgtt->base.clear_range(&ppgtt->base, 0, size); + ppgtt->vm.clear_range(&ppgtt->vm, 0, size); } /* Check we can incrementally allocate the entire range */ for (last = 0, size = 4096; - size <= ppgtt->base.total; + size <= ppgtt->vm.total; last = size, size <<= 2) { - err = ppgtt->base.allocate_va_range(&ppgtt->base, - last, size - last); + err = ppgtt->vm.allocate_va_range(&ppgtt->vm, + last, size - last); if (err) { if (err == -ENOMEM) { pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", @@ -188,7 +188,7 @@ static int igt_ppgtt_alloc(void *arg) } err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); + ppgtt->vm.cleanup(&ppgtt->vm); err_ppgtt: mutex_unlock(&dev_priv->drm.struct_mutex); kfree(ppgtt); @@ -987,12 +987,12 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv, err = PTR_ERR(ppgtt); goto out_unlock; } - GEM_BUG_ON(offset_in_page(ppgtt->base.total)); - GEM_BUG_ON(ppgtt->base.closed); + GEM_BUG_ON(offset_in_page(ppgtt->vm.total)); + GEM_BUG_ON(ppgtt->vm.closed); - err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); + err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time); - i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_close(&ppgtt->vm); i915_ppgtt_put(ppgtt); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -1061,18 +1061,18 @@ static int exercise_ggtt(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); restart: - list_sort(NULL, &ggtt->base.mm.hole_stack, sort_holes); - drm_mm_for_each_hole(node, &ggtt->base.mm, hole_start, hole_end) { + list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes); + drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) { if (hole_start < last) continue; - if (ggtt->base.mm.color_adjust) - ggtt->base.mm.color_adjust(node, 0, - &hole_start, &hole_end); + if (ggtt->vm.mm.color_adjust) + ggtt->vm.mm.color_adjust(node, 0, + &hole_start, &hole_end); if (hole_start >= hole_end) continue; - err = func(i915, &ggtt->base, hole_start, hole_end, end_time); + err = func(i915, &ggtt->vm, hole_start, hole_end, end_time); if (err) break; @@ -1134,7 +1134,7 @@ static int igt_ggtt_page(void *arg) goto out_free; memset(&tmp, 0, sizeof(tmp)); - err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, + err = drm_mm_insert_node_in_range(&ggtt->vm.mm, &tmp, count * PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, @@ -1147,9 +1147,9 @@ static int igt_ggtt_page(void *arg) for (n = 0; n < count; n++) { u64 offset = tmp.start + n * PAGE_SIZE; - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, 0), - offset, I915_CACHE_NONE, 0); + ggtt->vm.insert_page(&ggtt->vm, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); } order = i915_random_order(count, &prng); @@ -1188,7 +1188,7 @@ static int igt_ggtt_page(void *arg) kfree(order); out_remove: - ggtt->base.clear_range(&ggtt->base, tmp.start, tmp.size); + ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size); intel_runtime_pm_put(i915); drm_mm_remove_node(&tmp); out_unpin: @@ -1229,7 +1229,7 @@ static int exercise_mock(struct drm_i915_private *i915, ppgtt = ctx->ppgtt; GEM_BUG_ON(!ppgtt); - err = func(i915, &ppgtt->base, 0, ppgtt->base.total, end_time); + err = func(i915, &ppgtt->vm, 0, ppgtt->vm.total, end_time); mock_context_close(ctx); return err; @@ -1270,7 +1270,7 @@ static int igt_gtt_reserve(void *arg) /* Start by filling the GGTT */ for (total = 0; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; total += 2*I915_GTT_PAGE_SIZE) { struct i915_vma *vma; @@ -1288,20 +1288,20 @@ static int igt_gtt_reserve(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, obj->base.size, total, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.base.total, err); + total, i915->ggtt.vm.total, err); goto out; } track_vma_bind(vma); @@ -1319,7 +1319,7 @@ static int igt_gtt_reserve(void *arg) /* Now we start forcing evictions */ for (total = I915_GTT_PAGE_SIZE; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; total += 2*I915_GTT_PAGE_SIZE) { struct i915_vma *vma; @@ -1337,20 +1337,20 @@ static int igt_gtt_reserve(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, obj->base.size, total, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.base.total, err); + total, i915->ggtt.vm.total, err); goto out; } track_vma_bind(vma); @@ -1371,7 +1371,7 @@ static int igt_gtt_reserve(void *arg) struct i915_vma *vma; u64 offset; - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1383,18 +1383,18 @@ static int igt_gtt_reserve(void *arg) goto out; } - offset = random_offset(0, i915->ggtt.base.total, + offset = random_offset(0, i915->ggtt.vm.total, 2*I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT); - err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + err = i915_gem_gtt_reserve(&i915->ggtt.vm, &vma->node, obj->base.size, offset, obj->cache_level, 0); if (err) { pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.base.total, err); + total, i915->ggtt.vm.total, err); goto out; } track_vma_bind(vma); @@ -1429,8 +1429,8 @@ static int igt_gtt_insert(void *arg) u64 start, end; } invalid_insert[] = { { - i915->ggtt.base.total + I915_GTT_PAGE_SIZE, 0, - 0, i915->ggtt.base.total, + i915->ggtt.vm.total + I915_GTT_PAGE_SIZE, 0, + 0, i915->ggtt.vm.total, }, { 2*I915_GTT_PAGE_SIZE, 0, @@ -1460,7 +1460,7 @@ static int igt_gtt_insert(void *arg) /* Check a couple of obviously invalid requests */ for (ii = invalid_insert; ii->size; ii++) { - err = i915_gem_gtt_insert(&i915->ggtt.base, &tmp, + err = i915_gem_gtt_insert(&i915->ggtt.vm, &tmp, ii->size, ii->alignment, I915_COLOR_UNEVICTABLE, ii->start, ii->end, @@ -1475,7 +1475,7 @@ static int igt_gtt_insert(void *arg) /* Start by filling the GGTT */ for (total = 0; - total + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total + I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; total += I915_GTT_PAGE_SIZE) { struct i915_vma *vma; @@ -1493,15 +1493,15 @@ static int igt_gtt_insert(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.base.total, + 0, i915->ggtt.vm.total, 0); if (err == -ENOSPC) { /* maxed out the GGTT space */ @@ -1510,7 +1510,7 @@ static int igt_gtt_insert(void *arg) } if (err) { pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.base.total, err); + total, i915->ggtt.vm.total, err); goto out; } track_vma_bind(vma); @@ -1522,7 +1522,7 @@ static int igt_gtt_insert(void *arg) list_for_each_entry(obj, &objects, st_link) { struct i915_vma *vma; - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1542,7 +1542,7 @@ static int igt_gtt_insert(void *arg) struct i915_vma *vma; u64 offset; - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; @@ -1557,13 +1557,13 @@ static int igt_gtt_insert(void *arg) goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.base.total, + 0, i915->ggtt.vm.total, 0); if (err) { pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.base.total, err); + total, i915->ggtt.vm.total, err); goto out; } track_vma_bind(vma); @@ -1579,7 +1579,7 @@ static int igt_gtt_insert(void *arg) /* And then force evictions */ for (total = 0; - total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.vm.total; total += 2*I915_GTT_PAGE_SIZE) { struct i915_vma *vma; @@ -1597,19 +1597,19 @@ static int igt_gtt_insert(void *arg) list_add(&obj->st_link, &objects); - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto out; } - err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + err = i915_gem_gtt_insert(&i915->ggtt.vm, &vma->node, obj->base.size, 0, obj->cache_level, - 0, i915->ggtt.base.total, + 0, i915->ggtt.vm.total, 0); if (err) { pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", - total, i915->ggtt.base.total, err); + total, i915->ggtt.vm.total, err); goto out; } track_vma_bind(vma); @@ -1669,7 +1669,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_page), }; - GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); + GEM_BUG_ON(offset_in_page(i915->ggtt.vm.total)); return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index fbdb2419d418..2b2dde94526f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -113,7 +113,7 @@ static int igt_gem_huge(void *arg) obj = huge_gem_object(i915, nreal * PAGE_SIZE, - i915->ggtt.base.total + PAGE_SIZE); + i915->ggtt.vm.total + PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -311,7 +311,7 @@ static int igt_partial_tiling(void *arg) obj = huge_gem_object(i915, nreal << PAGE_SHIFT, - (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); + (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -440,7 +440,7 @@ static int make_obj_busy(struct drm_i915_gem_object *obj) struct i915_vma *vma; int err; - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 94bc2e1898a4..a3a89aadeccb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -430,7 +430,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (err) goto err; - vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err; @@ -555,7 +555,8 @@ out_unlock: static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { struct i915_gem_context *ctx = i915->kernel_context; - struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; struct drm_i915_gem_object *obj; const int gen = INTEL_GEN(i915); struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index e90f97236e50..8400a8cc5cf2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -35,7 +35,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != &ctx->ppgtt->base) { + if (vma->vm != &ctx->ppgtt->vm) { pr_err("VMA created with wrong VM\n"); ok = false; } @@ -110,8 +110,7 @@ static int create_vmas(struct drm_i915_private *i915, list_for_each_entry(obj, objects, st_link) { for (pinned = 0; pinned <= 1; pinned++) { list_for_each_entry(ctx, contexts, link) { - struct i915_address_space *vm = - &ctx->ppgtt->base; + struct i915_address_space *vm = &ctx->ppgtt->vm; struct i915_vma *vma; int err; @@ -259,12 +258,12 @@ static int igt_vma_pin1(void *arg) VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.vm.total - 4096)), VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), - VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), - INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.base.total), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.vm.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.vm.total), INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), VALID(4096, PIN_GLOBAL), @@ -272,12 +271,12 @@ static int igt_vma_pin1(void *arg) VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), NOSPACE(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), - VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), - VALID(i915->ggtt.base.total, PIN_GLOBAL), - NOSPACE(i915->ggtt.base.total + 4096, PIN_GLOBAL), + VALID(i915->ggtt.vm.total - 4096, PIN_GLOBAL), + VALID(i915->ggtt.vm.total, PIN_GLOBAL), + NOSPACE(i915->ggtt.vm.total + 4096, PIN_GLOBAL), NOSPACE(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), - INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.vm.total - 4096)), INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), @@ -289,9 +288,9 @@ static int igt_vma_pin1(void *arg) * variable start, end and size. */ NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), - NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.base.total), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.vm.total), NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), - NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.vm.total - 4096)), #endif { }, #undef NOSPACE @@ -307,13 +306,13 @@ static int igt_vma_pin1(void *arg) * focusing on error handling of boundary conditions. */ - GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.base.mm)); + GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.vm.mm)); obj = i915_gem_object_create_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); - vma = checked_vma_instance(obj, &i915->ggtt.base, NULL); + vma = checked_vma_instance(obj, &i915->ggtt.vm, NULL); if (IS_ERR(vma)) goto out; @@ -405,7 +404,7 @@ static unsigned int rotated_size(const struct intel_rotation_plane_info *a, static int igt_vma_rotate(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_address_space *vm = &i915->ggtt.base; + struct i915_address_space *vm = &i915->ggtt.vm; struct drm_i915_gem_object *obj; const struct intel_rotation_plane_info planes[] = { { .width = 1, .height = 1, .stride = 1 }, @@ -604,7 +603,7 @@ static bool assert_pin(struct i915_vma *vma, static int igt_vma_partial(void *arg) { struct drm_i915_private *i915 = arg; - struct i915_address_space *vm = &i915->ggtt.base; + struct i915_address_space *vm = &i915->ggtt.vm; const unsigned int npages = 1021; /* prime! */ struct drm_i915_gem_object *obj; const struct phase { diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 2091e3a6a5be..390a157b37c3 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -107,8 +107,8 @@ static int emit_recurse_batch(struct hang *h, struct drm_i915_private *i915 = h->i915; struct i915_address_space *vm = rq->gem_context->ppgtt ? - &rq->gem_context->ppgtt->base : - &i915->ggtt.base; + &rq->gem_context->ppgtt->vm : + &i915->ggtt.vm; struct i915_vma *hws, *vma; unsigned int flags; u32 *batch; diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 68cb9126b3e1..0b6da08c8cae 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -83,7 +83,7 @@ static int emit_recurse_batch(struct spinner *spin, struct i915_request *rq, u32 arbitration_command) { - struct i915_address_space *vm = &rq->gem_context->ppgtt->base; + struct i915_address_space *vm = &rq->gem_context->ppgtt->vm; struct i915_vma *hws, *vma; u32 *batch; int err; diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c index 17444a3abbb9..f1cfb0fb6bea 100644 --- a/drivers/gpu/drm/i915/selftests/intel_workarounds.c +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -33,7 +33,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) memset(cs, 0xc5, PAGE_SIZE); i915_gem_object_unpin_map(result); - vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index 36c112088940..556c546f2715 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -66,25 +66,25 @@ mock_ppgtt(struct drm_i915_private *i915, return NULL; kref_init(&ppgtt->ref); - ppgtt->base.i915 = i915; - ppgtt->base.total = round_down(U64_MAX, PAGE_SIZE); - ppgtt->base.file = ERR_PTR(-ENODEV); - - INIT_LIST_HEAD(&ppgtt->base.active_list); - INIT_LIST_HEAD(&ppgtt->base.inactive_list); - INIT_LIST_HEAD(&ppgtt->base.unbound_list); - - INIT_LIST_HEAD(&ppgtt->base.global_link); - drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); - - ppgtt->base.clear_range = nop_clear_range; - ppgtt->base.insert_page = mock_insert_page; - ppgtt->base.insert_entries = mock_insert_entries; - ppgtt->base.bind_vma = mock_bind_ppgtt; - ppgtt->base.unbind_vma = mock_unbind_ppgtt; - ppgtt->base.set_pages = ppgtt_set_pages; - ppgtt->base.clear_pages = clear_pages; - ppgtt->base.cleanup = mock_cleanup; + ppgtt->vm.i915 = i915; + ppgtt->vm.total = round_down(U64_MAX, PAGE_SIZE); + ppgtt->vm.file = ERR_PTR(-ENODEV); + + INIT_LIST_HEAD(&ppgtt->vm.active_list); + INIT_LIST_HEAD(&ppgtt->vm.inactive_list); + INIT_LIST_HEAD(&ppgtt->vm.unbound_list); + + INIT_LIST_HEAD(&ppgtt->vm.global_link); + drm_mm_init(&ppgtt->vm.mm, 0, ppgtt->vm.total); + + ppgtt->vm.clear_range = nop_clear_range; + ppgtt->vm.insert_page = mock_insert_page; + ppgtt->vm.insert_entries = mock_insert_entries; + ppgtt->vm.bind_vma = mock_bind_ppgtt; + ppgtt->vm.unbind_vma = mock_unbind_ppgtt; + ppgtt->vm.set_pages = ppgtt_set_pages; + ppgtt->vm.clear_pages = clear_pages; + ppgtt->vm.cleanup = mock_cleanup; return ppgtt; } @@ -107,27 +107,27 @@ void mock_init_ggtt(struct drm_i915_private *i915) INIT_LIST_HEAD(&i915->vm_list); - ggtt->base.i915 = i915; + ggtt->vm.i915 = i915; ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE); ggtt->mappable_end = resource_size(&ggtt->gmadr); - ggtt->base.total = 4096 * PAGE_SIZE; - - ggtt->base.clear_range = nop_clear_range; - ggtt->base.insert_page = mock_insert_page; - ggtt->base.insert_entries = mock_insert_entries; - ggtt->base.bind_vma = mock_bind_ggtt; - ggtt->base.unbind_vma = mock_unbind_ggtt; - ggtt->base.set_pages = ggtt_set_pages; - ggtt->base.clear_pages = clear_pages; - ggtt->base.cleanup = mock_cleanup; - - i915_address_space_init(&ggtt->base, i915, "global"); + ggtt->vm.total = 4096 * PAGE_SIZE; + + ggtt->vm.clear_range = nop_clear_range; + ggtt->vm.insert_page = mock_insert_page; + ggtt->vm.insert_entries = mock_insert_entries; + ggtt->vm.bind_vma = mock_bind_ggtt; + ggtt->vm.unbind_vma = mock_unbind_ggtt; + ggtt->vm.set_pages = ggtt_set_pages; + ggtt->vm.clear_pages = clear_pages; + ggtt->vm.cleanup = mock_cleanup; + + i915_address_space_init(&ggtt->vm, i915, "global"); } void mock_fini_ggtt(struct drm_i915_private *i915) { struct i915_ggtt *ggtt = &i915->ggtt; - i915_address_space_fini(&ggtt->base); + i915_address_space_fini(&ggtt->vm); } -- cgit v1.2.3 From b3ee09a4de33259a89d30aca6b2ebb0bc26640af Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jun 2018 12:08:44 +0100 Subject: drm/i915/ringbuffer: Fix context restore upon reset The discovery with trying to enable full-ppgtt was that we were completely failing to the load both the mm and context following the reset. Although we were performing mmio to set the PP_DIR (per-process GTT) and CCID (context), these were taking no effect (the assumption was that this would trigger reload of the context and restore the page tables). It was not until we performed the LRI + MI_SET_CONTEXT in a following context switch would anything occur. Since we are then required to reset the context image and PP_DIR using CS commands, we place those commands into every batch. The hardware should recognise the no-ops and eliminate the expensive context loads, but we still have to pay the cost of using cross-powerwell register writes. In practice, this has no effect on actual context switch times, and only adds a few hundred nanoseconds to no-op switches. We can improve the latter by eliminating the w/a around known no-op switches, but there is an ulterior motive to keeping them. Always emitting the context switch at the beginning of the request (and relying on HW to skip unneeded switches) does have one key advantage. Should we implement request reordering on Haswell, we will not know in advance what the previous executing context was on the GPU and so we would not be able to elide the MI_SET_CONTEXT commands ourselves and always have to emit them. Having our hand forced now actually prepares us for later. Now since that context and mm follow the request, we no longer (and not for a long time since requests took over!) require a trace point to tell when we write the switch into the ring, since it is always. (This is even more important when you remember that simply writing into the ring bears no relation to the current mm.) v2: Sandybridge has to agree to use LRI as well. Testcase: igt/drv_selftests/live_hangcheck Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180611110845.31890-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 45 ------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 2 - drivers/gpu/drm/i915/i915_request.c | 2 + drivers/gpu/drm/i915/i915_request.h | 3 + drivers/gpu/drm/i915/i915_trace.h | 33 --------- drivers/gpu/drm/i915/intel_engine_cs.c | 3 - drivers/gpu/drm/i915/intel_ringbuffer.c | 125 ++++++++++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 9 --- 8 files changed, 66 insertions(+), 156 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7ccfdbc8f9b4..ac75e0c5735c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1712,45 +1712,6 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, wmb(); } -static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) -{ - GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - return ppgtt->pd.base.ggtt_offset << 10; -} - -static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - u32 *cs; - - /* NB: TLBs must be flushed and invalidated before a switch */ - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(2); - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); - *cs++ = PP_DIR_DCLV_2G; - *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); - *cs++ = get_pd_offset(ppgtt); - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq) -{ - struct intel_engine_cs *engine = rq->engine; - struct drm_i915_private *dev_priv = rq->i915; - - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt)); - return 0; -} - static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; @@ -2024,12 +1985,6 @@ static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->vm.dma = &i915->drm.pdev->dev; ppgtt->vm.pte_encode = ggtt->vm.pte_encode; - if (IS_GEN6(i915)) - ppgtt->switch_mm = gen6_mm_switch; - else if (IS_GEN7(i915)) - ppgtt->switch_mm = gen7_mm_switch; - else - BUG(); err = gen6_ppgtt_alloc(ppgtt); if (err) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 16307ba7e303..e70f6abcd0f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -406,8 +406,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, - struct i915_request *rq); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index f187250e60c6..9092f5464c24 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -817,6 +817,8 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) /* Keep a second pin for the dual retirement along engine and ring */ __intel_context_pin(ce); + rq->infix = rq->ring->emit; /* end of header; start of user payload */ + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 491ff81d0fea..0e9aba53d0e4 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -134,6 +134,9 @@ struct i915_request { /** Position in the ring of the start of the request */ u32 head; + /** Position in the ring of the start of the user packets */ + u32 infix; + /** * Position in the ring of the start of the postfix. * This is required to calculate the maximum available ring space diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 1472f48ab2e8..b50c6b829715 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -973,39 +973,6 @@ DEFINE_EVENT(i915_context, i915_context_free, TP_ARGS(ctx) ); -/** - * DOC: switch_mm tracepoint - * - * This tracepoint allows tracking of the mm switch, which is an important point - * in the lifetime of the vm in the legacy submission path. This tracepoint is - * called only if full ppgtt is enabled. - */ -TRACE_EVENT(switch_mm, - TP_PROTO(struct intel_engine_cs *engine, struct i915_gem_context *to), - - TP_ARGS(engine, to), - - TP_STRUCT__entry( - __field(u16, class) - __field(u16, instance) - __field(struct i915_gem_context *, to) - __field(struct i915_address_space *, vm) - __field(u32, dev) - ), - - TP_fast_assign( - __entry->class = engine->uabi_class; - __entry->instance = engine->instance; - __entry->to = to; - __entry->vm = to->ppgtt ? &to->ppgtt->vm : NULL; - __entry->dev = engine->i915->drm.primary->index; - ), - - TP_printk("dev=%u, engine=%u:%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->class, __entry->instance, __entry->to, - __entry->vm) -); - #endif /* _I915_TRACE_H_ */ /* This part must be outside protection */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 2ec2e60dc670..d1cf8b4926ab 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1168,9 +1168,6 @@ void intel_engine_lost_context(struct intel_engine_cs *engine) lockdep_assert_held(&engine->i915->drm.struct_mutex); - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; - ce = fetch_and_zero(&engine->last_retired_context); if (ce) intel_context_unpin(ce); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 5bc53a5f4504..d72a6a5ff3ac 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -541,11 +541,23 @@ static struct i915_request *reset_prepare(struct intel_engine_cs *engine) return i915_gem_find_active_request(engine); } -static void reset_ring(struct intel_engine_cs *engine, - struct i915_request *request) +static void skip_request(struct i915_request *rq) { - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + void *vaddr = rq->ring->vaddr; + u32 head; + + head = rq->infix; + if (rq->postfix < head) { + memset32(vaddr + head, MI_NOOP, + (rq->ring->size - head) / sizeof(u32)); + head = 0; + } + memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32)); +} + +static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) +{ + GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); /* * RC6 must be prevented until the reset is complete and the engine @@ -569,43 +581,11 @@ static void reset_ring(struct intel_engine_cs *engine, * If the request was innocent, we try to replay the request with * the restored context. */ - if (request) { - struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = request->hw_context; - struct i915_hw_ppgtt *ppgtt; - - if (ce->state) { - I915_WRITE(CCID, - i915_ggtt_offset(ce->state) | - BIT(8) /* must be set! */ | - CCID_EXTENDED_STATE_SAVE | - CCID_EXTENDED_STATE_RESTORE | - CCID_EN); - } - - ppgtt = request->gem_context->ppgtt ?: engine->i915->mm.aliasing_ppgtt; - if (ppgtt) { - u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; - - I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); - I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); - - /* Wait for the PD reload to complete */ - if (intel_wait_for_register(dev_priv, - RING_PP_DIR_BASE(engine), - BIT(0), 0, - 10)) - DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); - - ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); - } - + if (rq) { /* If the rq hung, jump to its breadcrumb and skip the batch */ - if (request->fence.error == -EIO) - request->ring->head = request->postfix; - } else { - engine->legacy_active_context = NULL; - engine->legacy_active_ppgtt = NULL; + rq->ring->head = intel_ring_wrap(rq->ring, rq->head); + if (rq->fence.error == -EIO) + skip_request(rq); } } @@ -1446,6 +1426,29 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) intel_ring_reset(engine->buffer, 0); } +static int load_pd_dir(struct i915_request *rq, + const struct i915_hw_ppgtt *ppgtt) +{ + const struct intel_engine_cs * const engine = rq->engine; + u32 *cs; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = ppgtt->pd.base.ggtt_offset << 10; + + intel_ring_advance(rq, cs); + + return 0; +} + static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; @@ -1590,31 +1593,28 @@ static int remap_l3(struct i915_request *rq, int slice) static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; - struct i915_gem_context *to_ctx = rq->gem_context; - struct i915_hw_ppgtt *to_mm = - to_ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; - struct i915_gem_context *from_ctx = engine->legacy_active_context; - struct i915_hw_ppgtt *from_mm = engine->legacy_active_ppgtt; + struct i915_gem_context *ctx = rq->gem_context; + struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + unsigned int unwind_mm = 0; u32 hw_flags = 0; int ret, i; lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - if (to_mm != from_mm || - (to_mm && intel_engine_flag(engine) & to_mm->pd_dirty_rings)) { - trace_switch_mm(engine, to_ctx); - ret = to_mm->switch_mm(to_mm, rq); + if (ppgtt) { + ret = load_pd_dir(rq, ppgtt); if (ret) goto err; - to_mm->pd_dirty_rings &= ~intel_engine_flag(engine); - engine->legacy_active_ppgtt = to_mm; - hw_flags = MI_FORCE_RESTORE; + if (intel_engine_flag(engine) & ppgtt->pd_dirty_rings) { + unwind_mm = intel_engine_flag(engine); + ppgtt->pd_dirty_rings &= ~unwind_mm; + hw_flags = MI_FORCE_RESTORE; + } } - if (rq->hw_context->state && - (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { + if (rq->hw_context->state) { GEM_BUG_ON(engine->id != RCS); /* @@ -1624,35 +1624,32 @@ static int switch_context(struct i915_request *rq) * as nothing actually executes using the kernel context; it * is purely used for flushing user contexts. */ - if (i915_gem_context_is_kernel(to_ctx)) + if (i915_gem_context_is_kernel(ctx)) hw_flags = MI_RESTORE_INHIBIT; ret = mi_set_context(rq, hw_flags); if (ret) goto err_mm; - - engine->legacy_active_context = to_ctx; } - if (to_ctx->remap_slice) { + if (ctx->remap_slice) { for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(to_ctx->remap_slice & BIT(i))) + if (!(ctx->remap_slice & BIT(i))) continue; ret = remap_l3(rq, i); if (ret) - goto err_ctx; + goto err_mm; } - to_ctx->remap_slice = 0; + ctx->remap_slice = 0; } return 0; -err_ctx: - engine->legacy_active_context = from_ctx; err_mm: - engine->legacy_active_ppgtt = from_mm; + if (unwind_mm) + ppgtt->pd_dirty_rings |= unwind_mm; err: return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index acef385c4c80..b44c67849749 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -557,15 +557,6 @@ struct intel_engine_cs { */ struct intel_context *last_retired_context; - /* We track the current MI_SET_CONTEXT in order to eliminate - * redudant context switches. This presumes that requests are not - * reordered! Or when they are the tracking is updated along with - * the emission of individual requests into the legacy command - * stream (ring). - */ - struct i915_gem_context *legacy_active_context; - struct i915_hw_ppgtt *legacy_active_ppgtt; - /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; -- cgit v1.2.3 From d9d117e40d4ffc03438177eeac83d96dfeee76be Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jun 2018 18:18:25 +0100 Subject: drm/i915/ringbuffer: Serialize load of PD_DIR After triggering the mm switch with a load of PD_DIR, which may be deferred unto the MI_SET_CONTEXT on rcs, serialise the next commands with that load by posting a read of PD_DIR (or else those subsequent commands may access the stale page tables). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180611171825.13678-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 5 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 49 +++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +++- 3 files changed, 44 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index d1cf8b4926ab..d278fed8cb31 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -499,7 +499,8 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_cmd_parser(engine); } -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) +int intel_engine_create_scratch(struct intel_engine_cs *engine, + unsigned int size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -533,7 +534,7 @@ err_unref: return ret; } -static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) +void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->scratch); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index bb4af8f84a22..735622dc73ec 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1361,8 +1361,9 @@ intel_ring_context_pin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { - struct intel_ring *ring; struct i915_timeline *timeline; + struct intel_ring *ring; + unsigned int size; int err; intel_engine_setup_common(engine); @@ -1388,12 +1389,21 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) GEM_BUG_ON(engine->buffer); engine->buffer = ring; - err = intel_engine_init_common(engine); + size = PAGE_SIZE; + if (HAS_BROKEN_CS_TLB(engine->i915)) + size = I830_WA_SIZE; + err = intel_engine_create_scratch(engine, size); if (err) goto err_unpin; + err = intel_engine_init_common(engine); + if (err) + goto err_scratch; + return 0; +err_scratch: + intel_engine_cleanup_scratch(engine); err_unpin: intel_ring_unpin(ring); err_ring: @@ -1455,6 +1465,25 @@ static int load_pd_dir(struct i915_request *rq, return 0; } +static int flush_pd_dir(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Stall until the page table load is complete */ + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + return 0; +} + static inline int mi_set_context(struct i915_request *rq, u32 flags) { struct drm_i915_private *i915 = rq->i915; @@ -1638,6 +1667,12 @@ static int switch_context(struct i915_request *rq) goto err_mm; } + if (ppgtt) { + ret = flush_pd_dir(rq); + if (ret) + goto err_mm; + } + if (ctx->remap_slice) { for (i = 0; i < MAX_L3_SLICES; i++) { if (!(ctx->remap_slice & BIT(i))) @@ -2158,16 +2193,6 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (ret) return ret; - if (INTEL_GEN(dev_priv) >= 6) { - ret = intel_engine_create_scratch(engine, PAGE_SIZE); - if (ret) - return ret; - } else if (HAS_BROKEN_CS_TLB(dev_priv)) { - ret = intel_engine_create_scratch(engine, I830_WA_SIZE); - if (ret) - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index e9494557aaa1..2c35dd3525a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -869,9 +869,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); void intel_engine_cleanup_common(struct intel_engine_cs *engine); +int intel_engine_create_scratch(struct intel_engine_cs *engine, + unsigned int size); +void intel_engine_cleanup_scratch(struct intel_engine_cs *engine); + int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_blt_ring_buffer(struct intel_engine_cs *engine); -- cgit v1.2.3 From 83c317832eb1862e27f872606ee1a2fed6ab638f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jun 2018 13:21:50 +0100 Subject: drm/i915: Dump the ringbuffer of the active request for debugging Sometimes we need to see what instructions we emitted for a request to try and gather a glimmer of insight into what the GPU is doing when it stops responding. v2: Move ring dumping into its own routine Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180614122150.17552-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_engine_cs.c | 41 +++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index d278fed8cb31..0ffce14ab08d 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1394,6 +1394,39 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, } } +static void print_request_ring(struct drm_printer *m, struct i915_request *rq) +{ + void *ring; + int size; + + drm_printf(m, + "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + + size = rq->tail - rq->head; + if (rq->tail < rq->head) + size += rq->ring->size; + + ring = kmalloc(size, GFP_ATOMIC); + if (ring) { + const void *vaddr = rq->ring->vaddr; + unsigned int head = rq->head; + unsigned int len = 0; + + if (rq->tail < head) { + len = rq->ring->size - head; + memcpy(ring, vaddr + head, len); + head = 0; + } + memcpy(ring + len, vaddr + head, size - len); + + hexdump(m, ring, size); + kfree(ring); + } +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) @@ -1444,11 +1477,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq = i915_gem_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); - drm_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); drm_printf(m, "\t\tring->head: 0x%08x\n", @@ -1459,6 +1488,8 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->ring->emit); drm_printf(m, "\t\tring->space: 0x%08x\n", rq->ring->space); + + print_request_ring(m, rq); } rcu_read_unlock(); -- cgit v1.2.3 From 286e615356000ae51fac0e497247543724414d52 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jun 2018 10:41:01 +0100 Subject: drm/i915: Make the hexdump row offset visually distinct Currently we use %08x for the row offset, and %08x for the binary contents of the buffer. This makes it very easily to confuse the two, so switch to using [%04x] for the start-of-row offset. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180614094103.18025-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 0ffce14ab08d..8b3d7aa3dfde 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1258,7 +1258,7 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) rowsize, sizeof(u32), line, sizeof(line), false) >= sizeof(line)); - drm_printf(m, "%08zx %s\n", pos, line); + drm_printf(m, "[%04zx] %s\n", pos, line); prev = buf + pos; skip = false; -- cgit v1.2.3 From e62230deeeae1f096a4be2360204536caf9ad8b6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jun 2018 10:41:02 +0100 Subject: drm/i915: Show CCID in engine dumps For debugging context issues, knowing what context the GPU is loading/using is helpful. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180614094103.18025-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8b3d7aa3dfde..13bb8c7d2621 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1273,6 +1273,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, &engine->execlists; u64 addr; + if (engine->id == RCS && IS_GEN(dev_priv, 4, 7)) + drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", I915_READ(RING_START(engine->mmio_base))); drm_printf(m, "\tRING_HEAD: 0x%08x\n", -- cgit v1.2.3 From 4fdd5b4e9aba5fbbc6d3072a5a87fa1d3f3fc030 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 16 Jun 2018 21:25:34 +0100 Subject: drm/i915: Fix fallout of fake reset along resume commit b2209e62a450 ("drm/i915/execlists: Reset the CSB head tracking on reset/sanitization") and commit 1288786b18f7 ("drm/i915: Move GEM sanitize from resume_early to resume") show the conflicting requirements on the code. We must reset the GPU before trashing live state on a fast resume (hibernation debug, or error paths), but we must only reset our state tracking iff the GPU is reset (or power cycled). This is tricky if we are disabling GPU reset to simulate broken hardware; we reset our state tracking but the GPU is left intact and recovers from its stale state. v2: Again without the assertion for forcewake, no longer required since commit b3ee09a4de33 ("drm/i915/ringbuffer: Fix context restore upon reset") as the contexts are reset from the CS ensuring everything is powered up. Fixes: b2209e62a450 ("drm/i915/execlists: Reset the CSB head tracking on reset/sanitization") Fixes: 1288786b18f7 ("drm/i915: Move GEM sanitize from resume_early to resume") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180616202534.18767-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 14 +++++--------- drivers/gpu/drm/i915/intel_engine_cs.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 8 -------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 5 files changed, 31 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a9c8c66bb525..33453d56c386 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1841,6 +1841,8 @@ static int i915_drm_resume_early(struct drm_device *dev) else intel_display_set_init_power(dev_priv, true); + intel_engines_sanitize(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); out: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5155e458b11e..822abf444378 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4990,8 +4990,7 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) void i915_gem_sanitize(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + int err; GEM_TRACE("\n"); @@ -5017,14 +5016,11 @@ void i915_gem_sanitize(struct drm_i915_private *i915) * it may impact the display and we are uncertain about the stability * of the reset, so this could be applied to even earlier gen. */ + err = -ENODEV; if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) - WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); - - /* Reset the submission backend after resume as well as the GPU reset */ - for_each_engine(engine, i915, id) { - if (engine->reset.reset) - engine->reset.reset(engine, NULL); - } + err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); + if (!err) + intel_engines_sanitize(i915); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); intel_runtime_pm_put(i915); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 13bb8c7d2621..32bf3a408d46 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1077,6 +1077,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } +/** + * intel_engines_sanitize: called after the GPU has lost power + * @i915: the i915 device + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_engines_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_engines_sanitize(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + for_each_engine(engine, i915, id) { + if (engine->reset.reset) + engine->reset.reset(engine, NULL); + } +} + /** * intel_engines_park: called when the GT is transitioning from busy->idle * @i915: the i915 device diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6a937a896651..4dae23885006 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -563,14 +563,6 @@ static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) { GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); - /* - * RC6 must be prevented until the reset is complete and the engine - * reinitialised. If it occurs in the middle of this sequence, the - * state written to/loaded from the power context is ill-defined (e.g. - * the PP_BASE_DIR may be lost). - */ - assert_forcewakes_active(engine->i915, FORCEWAKE_ALL); - /* * Try to restore the logical GPU state to match the continuation * of the request queue. If we skip the context/PD restore, then diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4003f3ebe3d1..a0bc7a8222b4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -1052,6 +1052,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) return cs; } +void intel_engines_sanitize(struct drm_i915_private *i915); + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -- cgit v1.2.3 From 26eb4cd6c7c7793ee76c73b66621f63daff51953 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Jun 2018 14:59:29 +0100 Subject: drm/i915: Disable bh around call to tasklet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guc submission backends expects to only be run from (at least) softirq context, but during our intel_engine_is_idle() check we would call into the tasklet to make sure it was flushed. As this could occur from process context, occasionally we would be caught out using a wait_for_atomic() not from an atomic context: [ 59.939091] WARN_ON_ONCE((1) && !(preempt_count() != 0)) [ 59.939142] WARNING: CPU: 1 PID: 2901 at drivers/gpu/drm/i915/intel_guc_submission.c:615 guc_submission_tasklet+0x784/0xa90 [i915] [ 59.939143] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul snd_hda_intel crc32_pclmul snd_hda_codec ghash_clmulni_intel snd_hwdep snd_hda_core e1000e snd_pcm mei_me mei prime_numbers [ 59.939164] CPU: 1 PID: 2901 Comm: gem_exec_schedu Tainted: G U W 4.18.0-rc1-g93475d62c730-drmtip_67+ #1 [ 59.939165] Hardware name: System manufacturer System Product Name/Z170M-PLUS, BIOS 3610 03/29/2018 [ 59.939188] RIP: 0010:guc_submission_tasklet+0x784/0xa90 [i915] [ 59.939189] Code: fc ff ff 80 3d 2f 87 11 00 00 0f 85 80 fb ff ff 48 c7 c6 f8 49 40 c0 48 c7 c7 80 41 3e c0 c6 05 14 87 11 00 01 e8 2c ea d6 d3 <0f> 0b e9 5f fb ff ff 8b 46 38 89 cf 31 c7 83 e7 c0 75 08 39 c1 0f [ 59.939253] RSP: 0018:ffffaafe08a03c10 EFLAGS: 00010286 [ 59.939255] RAX: 0000000000000000 RBX: ffff8f9112c246f0 RCX: 0000000000000001 [ 59.939256] RDX: 0000000080000001 RSI: ffffffff95086d8e RDI: 00000000ffffffff [ 59.939257] RBP: ffff8f9112c24680 R08: 000000009517be77 R09: 0000000000000000 [ 59.939258] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8f9112c24700 [ 59.939259] R13: ffff8f9112c24700 R14: 0000000000000000 R15: ffff8f9112c242a8 [ 59.939260] FS: 00007fc2cc7e5980(0000) GS:ffff8f9136c40000(0000) knlGS:0000000000000000 [ 59.939261] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 59.939262] CR2: 00007fc2cc815040 CR3: 000000021f10e003 CR4: 00000000003606e0 [ 59.939263] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 59.939264] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 59.939265] Call Trace: [ 59.939288] ? intel_engine_is_idle+0x64/0x160 [i915] [ 59.939323] ? intel_engine_dump+0x638/0x890 [i915] [ 59.939327] ? seq_printf+0x49/0x70 [ 59.939353] ? i915_engine_info+0xc8/0x100 [i915] [ 59.939356] ? drm_get_color_range_name+0x20/0x20 [ 59.939361] ? seq_read+0xf1/0x470 [ 59.939365] ? trace_hardirqs_on_caller+0xe0/0x1b0 [ 59.939370] ? full_proxy_read+0x51/0x80 [ 59.939389] ? __vfs_read+0x31/0x170 [ 59.939395] ? do_sys_open+0x13b/0x240 [ 59.939398] ? rcu_read_lock_sched_held+0x6f/0x80 [ 59.939401] ? vfs_read+0x9e/0x140 [ 59.939404] ? ksys_read+0x50/0xc0 [ 59.939409] ? do_syscall_64+0x55/0x190 [ 59.939412] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 59.939420] irq event stamp: 552834 [ 59.939422] hardirqs last enabled at (552833): [] console_unlock+0x3fc/0x600 [ 59.939425] hardirqs last disabled at (552834): [] error_entry+0x7c/0x100 [ 59.939451] softirqs last enabled at (552614): [] i915_request_add+0x2e3/0x7b0 [i915] [ 59.939470] softirqs last disabled at (552604): [] i915_request_add+0x25b/0x7b0 [i915] Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106977 Fixes: dd0cf235d81f ("drm/i915: Speed up idle detection by kicking the tasklets") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Michal Wajdeczko Cc: Michał Winiarski Cc: Michel Thierry Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180620135929.23956-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 32bf3a408d46..d3264bd6e9dc 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1000,10 +1000,12 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) if (READ_ONCE(engine->execlists.active)) { struct intel_engine_execlists *execlists = &engine->execlists; + local_bh_disable(); if (tasklet_trylock(&execlists->tasklet)) { execlists->tasklet.func(execlists->tasklet.data); tasklet_unlock(&execlists->tasklet); } + local_bh_enable(); if (READ_ONCE(execlists->active)) return false; -- cgit v1.2.3 From bc4237ec8deaaee5f75d1afa91a19bfe6f948c6f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:07 +0100 Subject: drm/i915/execlists: Unify CSB access pointers Following the removal of the last workarounds, the only CSB mmio access is for the old vGPU interface. The mmio registers presented by vGPU do not require forcewake and can be treated as ordinary volatile memory, i.e. they behave just like the HWSP access just at a different location. We can reduce the CSB access to a set of read/write/buffer pointers and treat the various paths identically and not worry about forcewake. (Forcewake is nightmare for worstcase latency, and we want to process this all with irqsoff -- no latency allowed!) v2: Comments, comments, comments. Well, 2 bonus comments. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 12 --- drivers/gpu/drm/i915/intel_lrc.c | 133 ++++++++++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 23 ++++-- 3 files changed, 82 insertions(+), 86 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index d3264bd6e9dc..7209c22798e6 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,7 +25,6 @@ #include #include "i915_drv.h" -#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -456,21 +455,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) i915_gem_batch_pool_init(&engine->batch_pool, engine); } -static bool csb_force_mmio(struct drm_i915_private *i915) -{ - /* Older GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) - return true; - - return false; -} - static void intel_engine_init_execlist(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - execlists->csb_use_mmio = csb_force_mmio(engine->i915); - execlists->port_mask = 1; BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8d3d41011ce..81f000c245ab 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -137,6 +137,7 @@ #include #include "i915_drv.h" #include "i915_gem_render_state.h" +#include "i915_vgpu.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_workarounds.h" @@ -954,44 +955,40 @@ static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; - struct drm_i915_private *i915 = engine->i915; - - /* The HWSP contains a (cacheable) mirror of the CSB */ - const u32 *buf = - &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - unsigned int head, tail; - bool fw = false; + const u32 * const buf = execlists->csb_status; + u8 head, tail; /* Clear before reading to catch new interrupts */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); smp_mb__after_atomic(); - if (unlikely(execlists->csb_use_mmio)) { - intel_uncore_forcewake_get(i915, execlists->fw_domains); - fw = true; - - buf = (u32 * __force) - (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); - - head = readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - tail = GEN8_CSB_WRITE_PTR(head); - head = GEN8_CSB_READ_PTR(head); - execlists->csb_head = head; - } else { - const int write_idx = - intel_hws_csb_write_index(i915) - - I915_HWS_CSB_BUF0_INDEX; + /* + * Note that csb_write, csb_status may be either in HWSP or mmio. + * When reading from the csb_write mmio register, we have to be + * careful to only use the GEN8_CSB_WRITE_PTR portion, which is + * the low 4bits. As it happens we know the next 4bits are always + * zero and so we can simply masked off the low u8 of the register + * and treat it identically to reading from the HWSP (without having + * to use explicit shifting and masking, and probably bifurcating + * the code to handle the legacy mmio read). + */ + head = execlists->csb_head; + tail = READ_ONCE(*execlists->csb_write); + GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail); + if (unlikely(head == tail)) + return; - head = execlists->csb_head; - tail = READ_ONCE(buf[write_idx]); - rmb(); /* Hopefully paired with a wmb() in HW */ - } - GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", - engine->name, - head, GEN8_CSB_READ_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?", - tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?"); + /* + * Hopefully paired with a wmb() in HW! + * + * We must complete the read of the write pointer before any reads + * from the CSB, so that we do not see stale values. Without an rmb + * (lfence) the HW may speculatively perform the CSB[] reads *before* + * we perform the READ_ONCE(*csb_write). + */ + rmb(); - while (head != tail) { + do { struct i915_request *rq; unsigned int status; unsigned int count; @@ -1017,12 +1014,12 @@ static void process_csb(struct intel_engine_cs *engine) * status notifier. */ - status = READ_ONCE(buf[2 * head]); /* maybe mmio! */ GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n", engine->name, head, - status, buf[2*head + 1], + buf[2 * head + 0], buf[2 * head + 1], execlists->active); + status = buf[2 * head]; if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED)) execlists_set_active(execlists, @@ -1104,16 +1101,11 @@ static void process_csb(struct intel_engine_cs *engine) } else { port_set(port, port_pack(rq, count)); } - } - - if (head != execlists->csb_head) { - execlists->csb_head = head; - writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), - i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine))); - } + } while (head != tail); - if (unlikely(fw)) - intel_uncore_forcewake_put(i915, execlists->fw_domains); + writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8), + execlists->csb_read); + execlists->csb_head = head; } /* @@ -2431,28 +2423,11 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) static void logical_ring_setup(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - enum forcewake_domains fw_domains; - intel_engine_setup_common(engine); /* Intentionally left blank. */ engine->buffer = NULL; - fw_domains = intel_uncore_forcewake_for_reg(dev_priv, - RING_ELSP(engine), - FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_PTR(engine), - FW_REG_READ | FW_REG_WRITE); - - fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, - RING_CONTEXT_STATUS_BUF_BASE(engine), - FW_REG_READ); - - engine->execlists.fw_domains = fw_domains; - tasklet_init(&engine->execlists.tasklet, execlists_submission_tasklet, (unsigned long)engine); @@ -2460,34 +2435,56 @@ logical_ring_setup(struct intel_engine_cs *engine) logical_ring_default_irqs(engine); } +static bool csb_force_mmio(struct drm_i915_private *i915) +{ + /* Older GVT emulation depends upon intercepting CSB mmio */ + return intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915); +} + static int logical_ring_init(struct intel_engine_cs *engine) { + struct drm_i915_private *i915 = engine->i915; + struct intel_engine_execlists * const execlists = &engine->execlists; int ret; ret = intel_engine_init_common(engine); if (ret) goto error; - if (HAS_LOGICAL_RING_ELSQ(engine->i915)) { - engine->execlists.submit_reg = engine->i915->regs + + if (HAS_LOGICAL_RING_ELSQ(i915)) { + execlists->submit_reg = i915->regs + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine)); - engine->execlists.ctrl_reg = engine->i915->regs + + execlists->ctrl_reg = i915->regs + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(engine)); } else { - engine->execlists.submit_reg = engine->i915->regs + + execlists->submit_reg = i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); } - engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) { + execlists->preempt_complete_status = ~0u; + if (i915->preempt_context) { struct intel_context *ce = - to_intel_context(engine->i915->preempt_context, engine); + to_intel_context(i915->preempt_context, engine); - engine->execlists.preempt_complete_status = + execlists->preempt_complete_status = upper_32_bits(ce->lrc_desc); } - engine->execlists.csb_head = GEN8_CSB_ENTRIES - 1; + execlists->csb_head = GEN8_CSB_ENTRIES - 1; + execlists->csb_read = + i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); + if (csb_force_mmio(i915)) { + execlists->csb_status = (u32 __force *) + (i915->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0))); + + execlists->csb_write = (u32 __force *)execlists->csb_read; + } else { + execlists->csb_status = + &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; + + execlists->csb_write = + &engine->status_page.page_addr[intel_hws_csb_write_index(i915)]; + } return 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 78f01a35823a..25792889dbf4 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -300,24 +300,35 @@ struct intel_engine_execlists { struct rb_node *first; /** - * @fw_domains: forcewake domains for irq tasklet + * @csb_read: control register for Context Switch buffer + * + * Note this register is always in mmio. */ - unsigned int fw_domains; + u32 __iomem *csb_read; /** - * @csb_head: context status buffer head + * @csb_write: control register for Context Switch buffer + * + * Note this register may be either mmio or HWSP shadow. */ - unsigned int csb_head; + u32 *csb_write; /** - * @csb_use_mmio: access csb through mmio, instead of hwsp + * @csb_status: status array for Context Switch buffer + * + * Note these register may be either mmio or HWSP shadow. */ - bool csb_use_mmio; + u32 *csb_status; /** * @preempt_complete_status: expected CSB upon completing preemption */ u32 preempt_complete_status; + + /** + * @csb_head: context status buffer head + */ + u8 csb_head; }; #define INTEL_ENGINE_CS_MAX_NAME 8 -- cgit v1.2.3 From fd8526e509020ed30298ab57d03edc97bef83962 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:10 +0100 Subject: drm/i915/execlists: Trust the CSB Now that we use the CSB stored in the CPU friendly HWSP, we do not need to track interrupts for when the mmio CSB registers are valid and can just check where we read up to last from the cached HWSP. This means we can forgo the atomic bit tracking from interrupt, and in the next patch it means we can check the CSB at any time. v2: Change the splitting inside reset_prepare, we only want to lose testing the interrupt in this patch, the next patch requires the change in locking Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 11 +++-------- drivers/gpu/drm/i915/intel_engine_cs.c | 8 ++------ drivers/gpu/drm/i915/intel_lrc.c | 29 ++++------------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 4 files changed, 9 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c95cb278a8bc..435a4444314c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1493,15 +1493,10 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, static void gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) { - struct intel_engine_execlists * const execlists = &engine->execlists; bool tasklet = false; - if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { - if (READ_ONCE(engine->execlists.active)) { - set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } - } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) + tasklet = true; if (iir & GT_RENDER_USER_INTERRUPT) { notify_ring(engine); @@ -1509,7 +1504,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) } if (tasklet) - tasklet_hi_schedule(&execlists->tasklet); + tasklet_hi_schedule(&engine->execlists.tasklet); } static void gen8_gt_irq_ack(struct drm_i915_private *i915, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 7209c22798e6..ace93958689e 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1353,12 +1353,10 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); read = GEN8_CSB_READ_PTR(ptr); write = GEN8_CSB_WRITE_PTR(ptr); - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n", + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], tasklet queued? %s (%s)\n", read, execlists->csb_head, write, intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted)), yesno(test_bit(TASKLET_STATE_SCHED, &engine->execlists.tasklet.state)), enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); @@ -1570,11 +1568,9 @@ void intel_engine_dump(struct intel_engine_cs *engine, spin_unlock(&b->rb_lock); local_irq_restore(flags); - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n", engine->irq_posted, yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))); drm_printf(m, "HWSP:\n"); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ed81f8ac60ca..d835da128a17 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -875,14 +875,6 @@ static void reset_irq(struct intel_engine_cs *engine) smp_store_mb(engine->execlists.active, 0); clear_gtiir(engine); - - /* - * The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. - */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } static void reset_csb_pointers(struct intel_engine_execlists *execlists) @@ -973,10 +965,6 @@ static void process_csb(struct intel_engine_cs *engine) const u32 * const buf = execlists->csb_status; u8 head, tail; - /* Clear before reading to catch new interrupts */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - smp_mb__after_atomic(); - /* * Note that csb_write, csb_status may be either in HWSP or mmio. * When reading from the csb_write mmio register, we have to be @@ -1129,11 +1117,10 @@ static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - GEM_TRACE("%s awake?=%d, active=%x, irq-posted?=%d\n", + GEM_TRACE("%s awake?=%d, active=%x\n", engine->name, engine->i915->gt.awake, - engine->execlists.active, - test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)); + engine->execlists.active); /* * We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -1145,14 +1132,7 @@ static void execlists_submission_tasklet(unsigned long data) */ GEM_BUG_ON(!engine->i915->gt.awake); - /* - * Prefer doing test_and_clear_bit() as a two stage operation to avoid - * imposing the cost of a locked atomic transaction when submitting a - * new request (outside of the context-switch interrupt). - */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - process_csb(engine); - + process_csb(engine); if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) execlists_dequeue(engine); } @@ -1920,8 +1900,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine) * and avoid blaming an innocent request if the stall was due to the * preemption itself. */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - process_csb(engine); + process_csb(engine); /* * The last active request can then be no later than the last request diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b02620990859..ce6cc2a6cf7a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -367,7 +367,6 @@ struct intel_engine_cs { unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 -#define ENGINE_IRQ_EXECLIST 1 /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the -- cgit v1.2.3 From 9512f985c32d45ab439a210981e0d73071da395a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 28 Jun 2018 21:12:11 +0100 Subject: drm/i915/execlists: Direct submission of new requests (avoid tasklet/ksoftirqd) Back in commit 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half"), we came to the conclusion that running our CSB processing and ELSP submission from inside the irq handler was a bad idea. A really bad idea as we could impose nearly 1s latency on other users of the system, on average! Deferring our work to a tasklet allowed us to do the processing with irqs enabled, reducing the impact to an average of about 50us. We have since eradicated the use of forcewaked mmio from inside the CSB processing and ELSP submission, bringing the impact down to around 5us (on Kabylake); an order of magnitude better than our measurements 2 years ago on Broadwell and only about 2x worse on average than the gem_syslatency on an unladen system. In this iteration of the tasklet-vs-direct submission debate, we seek a compromise where by we submit new requests immediately to the HW but defer processing the CS interrupt onto a tasklet. We gain the advantage of low-latency and ksoftirqd avoidance when waking up the HW, while avoiding the system-wide starvation of our CS irq-storms. Comparing the impact on the maximum latency observed (that is the time stolen from an RT process) over a 120s interval, repeated several times (using gem_syslatency, similar to RT's cyclictest) while the system is fully laden with i915 nops, we see that direct submission an actually improve the worse case. Maximum latency in microseconds of a third party RT thread (gem_syslatency -t 120 -f 2) x Always using tasklets (a couple of >1000us outliers removed) + Only using tasklets from CS irq, direct submission of requests +------------------------------------------------------------------------+ | + | | + | | + | | + + | | + + + | | + + + + x x x | | +++ + + + x x x x x x | | +++ + ++ + + *x x x x x x | | +++ + ++ + * *x x * x x x | | + +++ + ++ * * +*xxx * x x xx | | * +++ + ++++* *x+**xx+ * x x xxxx x | | **x++++*++**+*x*x****x+ * +x xx xxxx x x | |x* ******+***************++*+***xxxxxx* xx*x xxx + x+| | |__________MA___________| | | |______M__A________| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 118 91 186 124 125.28814 16.279137 + 120 92 187 109 112.00833 13.458617 Difference at 95.0% confidence -13.2798 +/- 3.79219 -10.5994% +/- 3.02677% (Student's t, pooled s = 14.9237) However the mean latency is adversely affected: Mean latency in microseconds of a third party RT thread (gem_syslatency -t 120 -f 1) x Always using tasklets + Only using tasklets from CS irq, direct submission of requests +------------------------------------------------------------------------+ | xxxxxx + ++ | | xxxxxx + ++ | | xxxxxx + +++ ++ | | xxxxxxx +++++ ++ | | xxxxxxx +++++ ++ | | xxxxxxx +++++ +++ | | xxxxxxx + ++++++++++ | | xxxxxxxx ++ ++++++++++ | | xxxxxxxx ++ ++++++++++ | | xxxxxxxxxx +++++++++++++++ | | xxxxxxxxxxx x +++++++++++++++ | |x xxxxxxxxxxxxx x + + ++++++++++++++++++ +| | |__A__| | | |____A___| | +------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 120 3.506 3.727 3.631 3.6321417 0.02773109 + 120 3.834 4.149 4.039 4.0375167 0.041221676 Difference at 95.0% confidence 0.405375 +/- 0.00888913 11.1608% +/- 0.244735% (Student's t, pooled s = 0.03513) However, since the mean latency corresponds to the amount of irqsoff processing we have to do for a CS interrupt, we only need to speed that up to benefit not just system latency but our own throughput. v2: Remember to defer submissions when under reset. v4: Only use direct submission for new requests v5: Be aware that with mixing direct tasklet evaluation and deferred tasklets, we may end up idling before running the deferred tasklet. v6: Remove the redudant likely() from tasklet_is_enabled(), restrict the annotation to reset_in_progress(). v7: Take the full timeline.lock when enabling perf_pmu stats as the tasklet is no longer a valid guard. A consequence is that the stats are now only valid for engines also using the timeline.lock to process state. Testcase: igt/gem_exec_latency/*rthog* References: 27af5eea54d1 ("drm/i915: Move execlists irq handler to a bottom half") Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180628201211.13837-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.h | 5 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 8 +-- drivers/gpu/drm/i915/intel_lrc.c | 102 +++++++++++++++++++++------------ 3 files changed, 75 insertions(+), 40 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 261da577829a..e46592956872 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -88,4 +88,9 @@ static inline void __tasklet_enable_sync_once(struct tasklet_struct *t) tasklet_kill(t); } +static inline bool __tasklet_is_enabled(const struct tasklet_struct *t) +{ + return !atomic_read(&t->count); +} + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ace93958689e..01862884a436 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1619,8 +1619,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - tasklet_disable(&execlists->tasklet); - write_seqlock_irqsave(&engine->stats.lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); + write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1644,8 +1644,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - write_sequnlock_irqrestore(&engine->stats.lock, flags); - tasklet_enable(&execlists->tasklet); + write_sequnlock(&engine->stats.lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return err; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d835da128a17..6ab6ddb103d1 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -563,12 +563,14 @@ static void complete_preempt_context(struct intel_engine_execlists *execlists) GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT)); execlists_cancel_port_requests(execlists); - execlists_unwind_incomplete_requests(execlists); + __unwind_incomplete_requests(container_of(execlists, + struct intel_engine_cs, + execlists)); execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void __execlists_dequeue(struct intel_engine_cs *engine) +static void execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -578,9 +580,8 @@ static void __execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; - lockdep_assert_held(&engine->timeline.lock); - - /* Hardware submission is through 2 ports. Conceptually each port + /* + * Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute * requests belonging to a single context from each ring. RING_HEAD @@ -770,15 +771,6 @@ done: !port_isset(engine->execlists.port)); } -static void execlists_dequeue(struct intel_engine_cs *engine) -{ - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); - __execlists_dequeue(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); -} - void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) { @@ -958,6 +950,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock_irqrestore(&engine->timeline.lock, flags); } +static inline bool +reset_in_progress(const struct intel_engine_execlists *execlists) +{ + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); +} + static void process_csb(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; @@ -1109,18 +1107,9 @@ static void process_csb(struct intel_engine_cs *engine) execlists->csb_head = head; } -/* - * Check the unread Context Status Buffers and manage the submission of new - * contexts to the ELSP accordingly. - */ -static void execlists_submission_tasklet(unsigned long data) +static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - - GEM_TRACE("%s awake?=%d, active=%x\n", - engine->name, - engine->i915->gt.awake, - engine->execlists.active); + lockdep_assert_held(&engine->timeline.lock); /* * We can skip acquiring intel_runtime_pm_get() here as it was taken @@ -1137,6 +1126,28 @@ static void execlists_submission_tasklet(unsigned long data) execlists_dequeue(engine); } +/* + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ +static void execlists_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + unsigned long flags; + + GEM_TRACE("%s awake?=%d, active=%x\n", + engine->name, + engine->i915->gt.awake, + engine->execlists.active); + + spin_lock_irqsave(&engine->timeline.lock, flags); + + if (engine->i915->gt.awake) /* we may be delayed until after we idle! */ + __execlists_submission_tasklet(engine); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); +} + static void queue_request(struct intel_engine_cs *engine, struct i915_sched_node *node, int prio) @@ -1145,16 +1156,30 @@ static void queue_request(struct intel_engine_cs *engine, &lookup_priolist(engine, prio)->requests); } -static void __submit_queue(struct intel_engine_cs *engine, int prio) +static void __update_queue(struct intel_engine_cs *engine, int prio) { engine->execlists.queue_priority = prio; - tasklet_hi_schedule(&engine->execlists.tasklet); +} + +static void __submit_queue_imm(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + if (reset_in_progress(execlists)) + return; /* defer until we restart the engine following reset */ + + if (execlists->tasklet.func == execlists_submission_tasklet) + __execlists_submission_tasklet(engine); + else + tasklet_hi_schedule(&execlists->tasklet); } static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) - __submit_queue(engine, prio); + if (prio > engine->execlists.queue_priority) { + __update_queue(engine, prio); + __submit_queue_imm(engine); + } } static void execlists_submit_request(struct i915_request *request) @@ -1166,11 +1191,12 @@ static void execlists_submit_request(struct i915_request *request) spin_lock_irqsave(&engine->timeline.lock, flags); queue_request(engine, &request->sched, rq_prio(request)); - submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); GEM_BUG_ON(list_empty(&request->sched.link)); + submit_queue(engine, rq_prio(request)); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } @@ -1297,8 +1323,11 @@ static void execlists_schedule(struct i915_request *request, } if (prio > engine->execlists.queue_priority && - i915_sw_fence_done(&sched_to_request(node)->submit)) - __submit_queue(engine, prio); + i915_sw_fence_done(&sched_to_request(node)->submit)) { + /* defer submission until after all of our updates */ + __update_queue(engine, prio); + tasklet_hi_schedule(&engine->execlists.tasklet); + } } spin_unlock_irq(&engine->timeline.lock); @@ -1879,6 +1908,7 @@ execlists_reset_prepare(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request *request, *active; + unsigned long flags; GEM_TRACE("%s\n", engine->name); @@ -1893,6 +1923,8 @@ execlists_reset_prepare(struct intel_engine_cs *engine) */ __tasklet_disable_sync_once(&execlists->tasklet); + spin_lock_irqsave(&engine->timeline.lock, flags); + /* * We want to flush the pending context switches, having disabled * the tasklet above, we can assume exclusive access to the execlists. @@ -1910,15 +1942,12 @@ execlists_reset_prepare(struct intel_engine_cs *engine) active = NULL; request = port_request(execlists->port); if (request) { - unsigned long flags; - /* * Prevent the breadcrumb from advancing before we decide * which request is currently active. */ intel_engine_stop_cs(engine); - spin_lock_irqsave(&engine->timeline.lock, flags); list_for_each_entry_from_reverse(request, &engine->timeline.requests, link) { @@ -1928,9 +1957,10 @@ execlists_reset_prepare(struct intel_engine_cs *engine) active = request; } - spin_unlock_irqrestore(&engine->timeline.lock, flags); } + spin_unlock_irqrestore(&engine->timeline.lock, flags); + return active; } -- cgit v1.2.3 From f0d759f038dcced7dfb756dc54c224f09ad062a2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 Jun 2018 17:35:41 -0500 Subject: drm/i915: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 141432 Addresses-Coverity-ID: 141433 Addresses-Coverity-ID: 141434 Addresses-Coverity-ID: 141435 Addresses-Coverity-ID: 141436 Addresses-Coverity-ID: 1357360 Addresses-Coverity-ID: 1357403 Addresses-Coverity-ID: 1357433 Addresses-Coverity-ID: 1392622 Addresses-Coverity-ID: 1415273 Addresses-Coverity-ID: 1435752 Addresses-Coverity-ID: 1441500 Addresses-Coverity-ID: 1454596 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180628223541.GA17665@embeddedor.com --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_stolen.c | 1 + drivers/gpu/drm/i915/intel_cdclk.c | 5 +++++ drivers/gpu/drm/i915/intel_ddi.c | 1 + drivers/gpu/drm/i915/intel_display.c | 2 ++ drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 +++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + drivers/gpu/drm/i915/intel_sdvo.c | 6 ++++++ 10 files changed, 22 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 048b722cf27c..0c0a1a959d0b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2120,6 +2120,7 @@ err: */ if (!i915_terminally_wedged(&dev_priv->gpu_error)) return VM_FAULT_SIGBUS; + /* else: fall through */ case -EAGAIN: /* * EAGAIN means the gpu is hung and we'll wait for the error diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 79a347295e00..055f8687776d 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -254,6 +254,7 @@ static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { default: MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + /* fall through */ case GEN7_STOLEN_RESERVED_1M: *size = 1024 * 1024; break; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index bf9433d7964d..29075c763428 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -316,6 +316,7 @@ static void pnv_get_cdclk(struct drm_i915_private *dev_priv, break; default: DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + /* fall through */ case GC_DISPLAY_CLOCK_133_MHZ_PNV: cdclk_state->cdclk = 133333; break; @@ -1797,6 +1798,7 @@ static int icl_calc_cdclk(int min_cdclk, unsigned int ref) switch (ref) { default: MISSING_CASE(ref); + /* fall through */ case 24000: ranges = ranges_24; break; @@ -1824,6 +1826,7 @@ static int icl_calc_cdclk_pll_vco(struct drm_i915_private *dev_priv, int cdclk) switch (cdclk) { default: MISSING_CASE(cdclk); + /* fall through */ case 307200: case 556800: case 652800: @@ -1896,6 +1899,7 @@ static u8 icl_calc_voltage_level(int cdclk) return 1; default: MISSING_CASE(cdclk); + /* fall through */ case 652800: case 648000: return 2; @@ -1913,6 +1917,7 @@ static void icl_get_cdclk(struct drm_i915_private *dev_priv, switch (val & ICL_DSSM_CDCLK_PLL_REFCLK_MASK) { default: MISSING_CASE(val); + /* fall through */ case ICL_DSSM_CDCLK_PLL_REFCLK_24MHz: cdclk_state->ref = 24000; break; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 0319825b725b..c74b01a52082 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1069,6 +1069,7 @@ static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, switch (id) { default: MISSING_CASE(id); + /* fall through */ case DPLL_ID_ICL_DPLL0: case DPLL_ID_ICL_DPLL1: return DDI_CLK_SEL_NONE; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ce7646265b50..12102224f47a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9360,6 +9360,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, switch (tmp & TRANS_DDI_EDP_INPUT_MASK) { default: WARN(1, "unknown pipe linked to edp transcoder\n"); + /* fall through */ case TRANS_DDI_EDP_INPUT_A_ONOFF: case TRANS_DDI_EDP_INPUT_A_ON: trans_edp_pipe = PIPE_A; @@ -11024,6 +11025,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) case INTEL_OUTPUT_DDI: if (WARN_ON(!HAS_DDI(to_i915(dev)))) break; + /* else: fall through */ case INTEL_OUTPUT_DP: case INTEL_OUTPUT_HDMI: case INTEL_OUTPUT_EDP: diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 57342364fd30..058696b7d6c3 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2566,6 +2566,7 @@ int icl_calc_dp_combo_pll_link(struct drm_i915_private *dev_priv, switch (index) { default: MISSING_CASE(index); + /* fall through */ case 0: link_clock = 540000; break; @@ -2639,6 +2640,7 @@ static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, switch (div1) { default: MISSING_CASE(div1); + /* fall through */ case 2: hsdiv = 0; break; @@ -2903,6 +2905,7 @@ static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) switch (id) { default: MISSING_CASE(id); + /* fall through */ case DPLL_ID_ICL_DPLL0: case DPLL_ID_ICL_DPLL1: return CNL_DPLL_ENABLE(id); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index a8073eb02ffa..cdfdebc11ef8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1253,6 +1253,7 @@ enc_to_dig_port(struct drm_encoder *encoder) switch (intel_encoder->type) { case INTEL_OUTPUT_DDI: WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + /* fall through */ case INTEL_OUTPUT_DP: case INTEL_OUTPUT_EDP: case INTEL_OUTPUT_HDMI: diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 01862884a436..478c928912c4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -229,6 +229,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) break; default: MISSING_CASE(class); + /* fall through */ case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d81b2cfe1c5e..6b5aa3b074ec 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -3212,6 +3212,7 @@ static void cnl_set_procmon_ref_values(struct drm_i915_private *dev_priv, switch (val & (PROCESS_INFO_MASK | VOLTAGE_INFO_MASK)) { default: MISSING_CASE(val); + /* fall through */ case VOLTAGE_INFO_0_85V | PROCESS_INFO_DOT_0: procmon = &cnl_procmon_values[PROCMON_0_85V_DOT_0]; break; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 9d9229a1f36c..36f81a96b8f6 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1340,6 +1340,7 @@ static void intel_sdvo_pre_enable(struct intel_encoder *intel_encoder, switch (crtc_state->pixel_multiplier) { default: WARN(1, "unknown pixel multiplier specified\n"); + /* fall through */ case 1: rate = SDVO_CLOCK_RATE_MULT_1X; break; case 2: rate = SDVO_CLOCK_RATE_MULT_2X; break; case 4: rate = SDVO_CLOCK_RATE_MULT_4X; break; @@ -2316,14 +2317,19 @@ intel_sdvo_guess_ddc_bus(struct intel_sdvo *sdvo) switch (sdvo->controlled_output) { case SDVO_OUTPUT_LVDS1: mask |= SDVO_OUTPUT_LVDS1; + /* fall through */ case SDVO_OUTPUT_LVDS0: mask |= SDVO_OUTPUT_LVDS0; + /* fall through */ case SDVO_OUTPUT_TMDS1: mask |= SDVO_OUTPUT_TMDS1; + /* fall through */ case SDVO_OUTPUT_TMDS0: mask |= SDVO_OUTPUT_TMDS0; + /* fall through */ case SDVO_OUTPUT_RGB1: mask |= SDVO_OUTPUT_RGB1; + /* fall through */ case SDVO_OUTPUT_RGB0: mask |= SDVO_OUTPUT_RGB0; break; -- cgit v1.2.3 From 481827b441674b7f1d030c223decdb56266ff398 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 11:14:41 +0100 Subject: drm/i915: Record logical context support in driver caps Avoid looking at the magical engines[RCS] to decide if the HW and driver supports logical contexts, and instead record that knowledge during initialisation. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180706101442.21279-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 6 +++--- drivers/gpu/drm/i915/intel_device_info.c | 2 ++ drivers/gpu/drm/i915/intel_device_info.h | 1 + drivers/gpu/drm/i915/intel_engine_cs.c | 2 ++ 5 files changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8a2196e4262b..c1c637e47bf5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2306,6 +2306,7 @@ intel_info(const struct drm_i915_private *dev_priv) } #define INTEL_INFO(dev_priv) intel_info((dev_priv)) +#define DRIVER_CAPS(dev_priv) (&(dev_priv)->caps) #define INTEL_GEN(dev_priv) ((dev_priv)->info.gen) #define INTEL_DEVID(dev_priv) ((dev_priv)->info.device_id) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 985ef70d9416..b10770cfccd2 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -512,8 +512,8 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) } DRM_DEBUG_DRIVER("%s context support initialized\n", - dev_priv->engine[RCS]->context_size ? "logical" : - "fake"); + DRIVER_CAPS(dev_priv)->has_logical_contexts ? + "logical" : "fake"); return 0; } @@ -720,7 +720,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct i915_gem_context *ctx; int ret; - if (!dev_priv->engine[RCS]->context_size) + if (!DRIVER_CAPS(dev_priv)->has_logical_contexts) return -ENODEV; if (args->pad != 0) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0fd13df424cf..0ef0c6448d53 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -858,6 +858,8 @@ void intel_device_info_runtime_init(struct intel_device_info *info) void intel_driver_caps_print(const struct intel_driver_caps *caps, struct drm_printer *p) { + drm_printf(p, "Has logical contexts? %s\n", + yesno(caps->has_logical_contexts)); drm_printf(p, "scheduler: %x\n", caps->scheduler); } diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 933e31669557..633f9fbf72ea 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -186,6 +186,7 @@ struct intel_device_info { struct intel_driver_caps { unsigned int scheduler; + bool has_logical_contexts:1; }; static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 478c928912c4..e2f562853aee 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -302,6 +302,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; + if (engine->context_size) + DRIVER_CAPS(dev_priv)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; -- cgit v1.2.3 From 890fd185d53037d05d10a0825950c4b038e39d4a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Jul 2018 22:07:10 +0100 Subject: drm/i915: Replace nested subclassing with explicit subclasses In the next patch, we will want a third distinct class of timeline that may overlap with the current pair of client and engine timeline classes. Rather than use the ad hoc markup of SINGLE_DEPTH_NESTING, initialise the different timeline classes with an explicit subclass. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180706210710.16251-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 2 +- drivers/gpu/drm/i915/i915_timeline.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 1 + drivers/gpu/drm/i915/selftests/mock_engine.c | 2 ++ 5 files changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2e05cf114083..1a9dab302433 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3095,7 +3095,7 @@ static void engine_skip_context(struct i915_request *request) GEM_BUG_ON(timeline == &engine->timeline); spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&timeline->lock); list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->gem_context == hung_ctx) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 7ae08b68121e..3248369dbcfb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -503,7 +503,7 @@ static void move_to_timeline(struct i915_request *request, GEM_BUG_ON(request->timeline == &request->engine->timeline); lockdep_assert_held(&request->engine->timeline.lock); - spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&request->timeline->lock); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); } diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index dc2a4632faa7..a2c2c3ab5fb0 100644 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -37,6 +37,8 @@ struct i915_timeline { u32 seqno; spinlock_t lock; +#define TIMELINE_CLIENT 0 /* default subclass */ +#define TIMELINE_ENGINE 1 /** * List of breadcrumbs associated with GPU requests currently diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e2f562853aee..0ac497275a51 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -483,6 +483,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) void intel_engine_setup_common(struct intel_engine_cs *engine) { i915_timeline_init(engine->i915, &engine->timeline, engine->name); + lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE); intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index c2a0451336cf..22a73da45ad5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -200,6 +200,8 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.submit_request = mock_submit_request; i915_timeline_init(i915, &engine->base.timeline, engine->base.name); + lockdep_set_subclass(&engine->base.timeline.lock, TIMELINE_ENGINE); + intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ -- cgit v1.2.3 From 655250a8d1aa3b18949a72869858d07ceac12799 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 Jun 2018 08:53:20 +0100 Subject: drm/i915/execlists: Switch to rb_root_cached The kernel recently gained an augmented rbtree with the purpose of cacheing the leftmost element of the rbtree, a frequent optimisation to avoid calls to rb_first() which is also employed by the execlists->queue. Switch from our open-coded cache to the library. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180629075348.27358-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 7 +++---- drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++++------- drivers/gpu/drm/i915/intel_lrc.c | 32 ++++++++++------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +------ 4 files changed, 19 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 0ac497275a51..220050107c48 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -467,8 +467,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); execlists->queue_priority = INT_MIN; - execlists->queue = RB_ROOT; - execlists->first = NULL; + execlists->queue = RB_ROOT_CACHED; } /** @@ -1004,7 +1003,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) } /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (READ_ONCE(engine->execlists.first)) + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) return false; /* Ring stopped? */ @@ -1540,7 +1539,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, last = NULL; count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); - for (rb = execlists->first; rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index f3945258fe1b..3952656f4c9a 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -695,9 +695,6 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) lockdep_assert_held(&engine->timeline.lock); - rb = execlists->first; - GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (port_isset(port)) { if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = @@ -719,7 +716,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(port_isset(port)); - while (rb) { + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -744,15 +741,13 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) submit = true; } - rb = rb_next(rb); - rb_erase(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } done: execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; - execlists->first = rb; if (submit) port_assign(port, last); if (last) @@ -761,7 +756,8 @@ done: /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); + GEM_BUG_ON(rb_first_cached(&execlists->queue) && + !port_isset(execlists->port)); return submit; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d937a21da2d8..ad436d200758 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -273,7 +273,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_node; + parent = &execlists->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -311,10 +311,7 @@ find_priolist: p->priority = prio; INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color(&p->node, &execlists->queue); - - if (first) - execlists->first = &p->node; + rb_insert_color_cached(&p->node, &execlists->queue, first); return p; } @@ -602,9 +599,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - rb = execlists->first; - GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (last) { /* * Don't resubmit or switch until all outstanding @@ -666,7 +660,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->tail = last->wa_tail; } - while (rb) { + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -725,8 +719,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) submit = true; } - rb = rb_next(rb); - rb_erase(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); @@ -752,14 +745,14 @@ done: execlists->queue_priority = port != execlists->port ? rq_prio(last) : INT_MIN; - execlists->first = rb; if (submit) { port_assign(port, last); execlists_submit_ports(engine); } /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); + GEM_BUG_ON(rb_first_cached(&execlists->queue) && + !port_isset(execlists->port)); /* Re-evaluate the executing context setup after each preemptive kick */ if (last) @@ -922,8 +915,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) } /* Flush the queued requests to the timeline list (for retiring). */ - rb = execlists->first; - while (rb) { + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { @@ -933,8 +925,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) __i915_request_submit(rq); } - rb = rb_next(rb); - rb_erase(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); @@ -943,8 +934,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority = INT_MIN; - execlists->queue = RB_ROOT; - execlists->first = NULL; + execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); spin_unlock_irqrestore(&engine->timeline.lock, flags); @@ -1192,7 +1182,7 @@ static void execlists_submit_request(struct i915_request *request) queue_request(engine, &request->sched, rq_prio(request)); - GEM_BUG_ON(!engine->execlists.first); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); submit_queue(engine, rq_prio(request)); @@ -2044,7 +2034,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; /* After a GPU reset, we may have requests to replay */ - if (execlists->first) + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) tasklet_schedule(&execlists->tasklet); /* diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ce6cc2a6cf7a..d1eee08e5f6b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -292,12 +292,7 @@ struct intel_engine_execlists { /** * @queue: queue of requests, in priority lists */ - struct rb_root queue; - - /** - * @first: leftmost level in priority @queue - */ - struct rb_node *first; + struct rb_root_cached queue; /** * @csb_read: control register for Context Switch buffer -- cgit v1.2.3 From 9701975e851082d179faaab9c6306a7800d86c0f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 13 Jul 2018 21:35:28 +0100 Subject: drm/i915: Do not short-circuit tasklets during reset Inside intel_engine_is_idle(), we flush the tasklet to ensure that is being run in a timely fashion (ksoftirqd has taught us to expect the worst). However, if we are in the middle of reset, the HW may not yet be ready to execute the submission tasklet and so we must respect the disable flag. Fixes: dd0cf235d81f ("drm/i915: Speed up idle detection by kicking the tasklets") Testcase: igt/drv_selftest/live_hangcheck Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Michel Thierry Link: https://patchwork.freedesktop.org/patch/msgid/20180713203529.1973-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c') diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 220050107c48..2d1952849d69 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -989,16 +989,18 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) /* Waiting to drain ELSP? */ if (READ_ONCE(engine->execlists.active)) { - struct intel_engine_execlists *execlists = &engine->execlists; + struct tasklet_struct *t = &engine->execlists.tasklet; local_bh_disable(); - if (tasklet_trylock(&execlists->tasklet)) { - execlists->tasklet.func(execlists->tasklet.data); - tasklet_unlock(&execlists->tasklet); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); } local_bh_enable(); - if (READ_ONCE(execlists->active)) + if (READ_ONCE(engine->execlists.active)) return false; } -- cgit v1.2.3