diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_lrc.c | 261 |
1 files changed, 186 insertions, 75 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 697af5add78b..c7c85134a84a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -139,6 +139,7 @@ #include "i915_gem_render_state.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" +#include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) @@ -183,7 +184,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, const struct i915_request *last, int prio) { - return engine->i915->preempt_context && prio > max(rq_prio(last), 0); + return (intel_engine_has_preemption(engine) && + __execlists_need_preempt(prio, rq_prio(last))); } /** @@ -374,6 +376,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +inline void +execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port) +{ + execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); +} + +inline void +execlists_user_end(struct intel_engine_execlists *execlists) +{ + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); +} + static inline void execlists_context_schedule_in(struct i915_request *rq) { @@ -454,10 +469,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -577,6 +594,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * know the next preemption status we see corresponds * to this ELSP update. */ + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) goto unlock; @@ -695,8 +714,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } + done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + /* + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. + * + * We choose queue_priority such that if we add a request of greater + * priority than this, we kick the submission tasklet to decide on + * the right order of submitting the requests to hardware. We must + * also be prepared to reorder requests as they are in-flight on the + * HW. We derive the queue_priority then as the first "hole" in + * the HW submission ports and if there are no available slots, + * the priority of the lowest executing request, i.e. last. + * + * When we do receive a higher priority request ready to run from the + * user, see queue_request(), the queue_priority is bumped to that + * request triggering preemption on the next dequeue (or subsequent + * interrupt for secondary ports). + */ + execlists->queue_priority = + port != execlists->port ? rq_prio(last) : INT_MIN; + execlists->first = rb; if (submit) port_assign(port, last); @@ -708,7 +746,7 @@ unlock: spin_unlock_irq(&engine->timeline->lock); if (submit) { - execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); + execlists_user_begin(execlists, execlists->port); execlists_submit_ports(engine); } @@ -725,6 +763,13 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); + GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n", + rq->engine->name, + (unsigned int)(port - execlists->port), + rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(rq->engine)); + GEM_BUG_ON(!execlists->active); intel_engine_context_out(rq->engine); @@ -738,6 +783,59 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) memset(port, 0, sizeof(*port)); port++; } + + execlists_user_end(execlists); +} + +static void clear_gtiir(struct intel_engine_cs *engine) +{ + static const u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, + }; + struct drm_i915_private *dev_priv = engine->i915; + int i; + + /* TODO: correctly reset irqs for gen11 */ + if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11)) + return; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are + * double buffered, and so if we only reset it once there may + * still be an interrupt pending. + */ + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + engine->irq_keep_mask); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & + engine->irq_keep_mask); +} + +static void reset_irq(struct intel_engine_cs *engine) +{ + /* Mark all CS interrupts as complete */ + smp_store_mb(engine->execlists.active, 0); + synchronize_hardirq(engine->i915->drm.irq); + + clear_gtiir(engine); + + /* + * The port is checked prior to scheduling a tasklet, but + * just in case we have suspended the tasklet to do the + * wedging make sure that when it wakes, it decides there + * is no work to do by clearing the irq_posted bit. + */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -747,7 +845,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("%s current %d\n", + engine->name, intel_engine_get_seqno(engine)); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -767,6 +866,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); spin_lock(&engine->timeline->lock); @@ -805,17 +905,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) spin_unlock(&engine->timeline->lock); - /* - * The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. - */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; - local_irq_restore(flags); } @@ -827,7 +916,7 @@ static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port * const port = execlists->port; + struct execlist_port *port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; bool fw = false; @@ -954,10 +1043,13 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, + intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -965,10 +1057,28 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(count == 0); if (--count == 0) { + /* + * On the final event corresponding to the + * submission of this context, we expect either + * an element-switch event or a completion + * event (and on completion, the active-idle + * marker). No more preemptions, lite-restore + * or otherwise. + */ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + + /* + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt and CSB is processed. + */ GEM_BUG_ON(!i915_request_completed(rq)); + execlists_context_schedule_out(rq); trace_i915_request_out(rq); i915_request_put(rq); @@ -976,17 +1086,14 @@ static void execlists_submission_tasklet(unsigned long data) GEM_TRACE("%s completed ctx=%d\n", engine->name, port->context_id); - execlists_port_complete(execlists, port); + port = execlists_port_complete(execlists, port); + if (port_isset(port)) + execlists_user_begin(execlists, port); + else + execlists_user_end(execlists); } else { port_set(port, port_pack(rq, count)); } - - /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - if (port_count(port) == 0) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -1001,6 +1108,11 @@ static void execlists_submission_tasklet(unsigned long data) if (fw) intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); + + /* If the engine is now idle, so should be the flag; and vice versa. */ + GEM_BUG_ON(execlists_is_active(&engine->execlists, + EXECLISTS_ACTIVE_USER) == + !port_isset(engine->execlists.port)); } static void queue_request(struct intel_engine_cs *engine, @@ -1010,12 +1122,16 @@ static void queue_request(struct intel_engine_cs *engine, list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); } +static void __submit_queue(struct intel_engine_cs *engine, int prio) +{ + engine->execlists.queue_priority = prio; + tasklet_hi_schedule(&engine->execlists.tasklet); +} + static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; - tasklet_hi_schedule(&engine->execlists.tasklet); - } + if (prio > engine->execlists.queue_priority) + __submit_queue(engine, prio); } static void execlists_submit_request(struct i915_request *request) @@ -1148,7 +1264,10 @@ static void execlists_schedule(struct i915_request *request, int prio) __list_del_entry(&pt->link); queue_request(engine, pt, prio); } - submit_queue(engine, prio); + + if (prio > engine->execlists.queue_priority && + i915_sw_fence_done(&pt_to_request(pt)->submit)) + __submit_queue(engine, prio); } spin_unlock_irq(&engine->timeline->lock); @@ -1215,6 +1334,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); + ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head; ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1565,14 +1685,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u8 gtiir[] = { - [RCS] = 0, - [BCS] = 0, - [VCS] = 1, - [VCS2] = 1, - [VECS] = 3, -}; - static void enable_execlists(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1632,6 +1744,10 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; + ret = intel_whitelist_workarounds_apply(engine); + if (ret) + return ret; + /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. @@ -1642,7 +1758,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return init_workarounds_ring(engine); + return 0; } static int gen9_init_render_ring(struct intel_engine_cs *engine) @@ -1653,32 +1769,11 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - return init_workarounds_ring(engine); -} - -static void reset_irq(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int i; - - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - - /* - * Clear any pending interrupt state. - * - * We do it twice out of paranoia that some of the IIR are double - * buffered, and if we only reset it once there may still be - * an interrupt pending. - */ - for (i = 0; i < 2; i++) { - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); - } - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & - (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift)); + ret = intel_whitelist_workarounds_apply(engine); + if (ret) + return ret; - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + return 0; } static void reset_common_ring(struct intel_engine_cs *engine, @@ -1688,14 +1783,13 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct intel_context *ce; unsigned long flags; - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + GEM_TRACE("%s request global=%x, current=%d\n", + engine->name, request ? request->global_seqno : 0, + intel_engine_get_seqno(engine)); /* See execlists_cancel_requests() for the irq/spinlock split. */ local_irq_save(flags); - reset_irq(engine); - /* * Catch up with any missed context-switch interrupts. * @@ -1706,15 +1800,13 @@ static void reset_common_ring(struct intel_engine_cs *engine, * requests were completed. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ spin_lock(&engine->timeline->lock); __unwind_incomplete_requests(engine); spin_unlock(&engine->timeline->lock); - /* Mark all CS interrupts as complete */ - execlists->active = 0; - local_irq_restore(flags); /* @@ -2006,7 +2098,7 @@ static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(rq); + ret = intel_ctx_workarounds_emit(rq); if (ret) return ret; @@ -2066,11 +2158,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (engine->i915->preempt_context) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->i915->caps.scheduler = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY; - if (engine->i915->preempt_context) + if (intel_engine_has_preemption(engine)) engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } @@ -2109,7 +2203,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) static inline void logical_ring_default_irqs(struct intel_engine_cs *engine) { - unsigned shift = engine->irq_shift; + unsigned int shift = 0; + + if (INTEL_GEN(engine->i915) < 11) { + const u8 irq_shifts[] = { + [RCS] = GEN8_RCS_IRQ_SHIFT, + [BCS] = GEN8_BCS_IRQ_SHIFT, + [VCS] = GEN8_VCS1_IRQ_SHIFT, + [VCS2] = GEN8_VCS2_IRQ_SHIFT, + [VECS] = GEN8_VECS_IRQ_SHIFT, + }; + + shift = irq_shifts[engine->id]; + } + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } @@ -2542,3 +2649,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) } } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_lrc.c" +#endif |