diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
25 files changed, 714 insertions, 504 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 5b31e1e05ddd..2c454f227c2e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -4,8 +4,10 @@ * Copyright © 2019 Intel Corporation */ +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" + #include "i915_drv.h" -#include "i915_gem_context.h" #include "i915_globals.h" #include "intel_context.h" @@ -52,14 +54,13 @@ int __intel_context_do_pin(struct intel_context *ce) intel_wakeref_t wakeref; err = 0; - with_intel_runtime_pm(ce->engine->i915, wakeref) + with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref) err = ce->ops->pin(ce); if (err) goto err; i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ - intel_context_get(ce); smp_mb__before_atomic(); /* flush pin before it is visible */ } @@ -87,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce) ce->ops->unpin(ce); i915_gem_context_put(ce->gem_context); - intel_context_put(ce); + intel_context_active_release(ce); } mutex_unlock(&ce->pin_mutex); intel_context_put(ce); } -static void intel_context_retire(struct i915_active_request *active, - struct i915_request *rq) +static int __context_pin_state(struct i915_vma *vma, unsigned long flags) { - struct intel_context *ce = - container_of(active, typeof(*ce), active_tracker); + int err; - intel_context_unpin(ce); + err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL); + if (err) + return err; + + /* + * And mark it as a globally pinned object to let the shrinker know + * it cannot reclaim the object until we release it. + */ + vma->obj->pin_global++; + vma->obj->mm.dirty = true; + + return 0; +} + +static void __context_unpin_state(struct i915_vma *vma) +{ + vma->obj->pin_global--; + __i915_vma_unpin(vma); +} + +static void intel_context_retire(struct i915_active *active) +{ + struct intel_context *ce = container_of(active, typeof(*ce), active); + + if (ce->state) + __context_unpin_state(ce->state); + + intel_context_put(ce); } void @@ -116,15 +142,52 @@ intel_context_init(struct intel_context *ce, ce->engine = engine; ce->ops = engine->cops; ce->sseu = engine->sseu; - ce->saturated = 0; INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); mutex_init(&ce->pin_mutex); - i915_active_request_init(&ce->active_tracker, - NULL, intel_context_retire); + i915_active_init(ctx->i915, &ce->active, intel_context_retire); +} + +int intel_context_active_acquire(struct intel_context *ce, unsigned long flags) +{ + int err; + + if (!i915_active_acquire(&ce->active)) + return 0; + + intel_context_get(ce); + + if (!ce->state) + return 0; + + err = __context_pin_state(ce->state, flags); + if (err) { + i915_active_cancel(&ce->active); + intel_context_put(ce); + return err; + } + + /* Preallocate tracking nodes */ + if (!i915_gem_context_is_kernel(ce->gem_context)) { + err = i915_active_acquire_preallocate_barrier(&ce->active, + ce->engine); + if (err) { + i915_active_release(&ce->active); + return err; + } + } + + return 0; +} + +void intel_context_active_release(struct intel_context *ce) +{ + /* Nodes preallocated in intel_context_active() */ + i915_active_acquire_barrier(&ce->active); + i915_active_release(&ce->active); } static void i915_global_context_shrink(void) @@ -159,7 +222,6 @@ void intel_context_enter_engine(struct intel_context *ce) void intel_context_exit_engine(struct intel_context *ce) { - ce->saturated = 0; intel_engine_pm_put(ce->engine); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 63392c88cd98..a47275bc4f01 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce) ce->ops->exit(ce); } +int intel_context_active_acquire(struct intel_context *ce, unsigned long flags); +void intel_context_active_release(struct intel_context *ce); + static inline struct intel_context *intel_context_get(struct intel_context *ce) { kref_get(&ce->ref); @@ -113,10 +116,11 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } -static inline void intel_context_timeline_lock(struct intel_context *ce) +static inline int __must_check +intel_context_timeline_lock(struct intel_context *ce) __acquires(&ce->ring->timeline->mutex) { - mutex_lock(&ce->ring->timeline->mutex); + return mutex_lock_interruptible(&ce->ring->timeline->mutex); } static inline void intel_context_timeline_unlock(struct intel_context *ce) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 963a312430e6..08049ee91cee 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -37,7 +37,7 @@ struct intel_context { struct i915_gem_context *gem_context; struct intel_engine_cs *engine; - struct intel_engine_cs *active; + struct intel_engine_cs *inflight; struct list_head signal_link; struct list_head signals; @@ -53,13 +53,11 @@ struct intel_context { atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ - intel_engine_mask_t saturated; /* submitting semaphores too late? */ - /** - * active_tracker: Active tracker for the external rq activity - * on this intel_context object. + * active: Active tracker for the rq activity (inc. external) on this + * intel_context object. */ - struct i915_active_request active_tracker; + struct i915_active active; const struct intel_context_ops *ops; diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 9359b3a7ad9c..2f1c6871ee95 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -52,6 +52,7 @@ struct drm_printer; #define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__) #define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__) #define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__) +#define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__) #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \ __ENGINE_REG_OP(read64_2x32, (engine__), \ @@ -68,6 +69,24 @@ struct drm_printer; #define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__) #define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__) +#define GEN6_RING_FAULT_REG_READ(engine__) \ + intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__)) + +#define GEN6_RING_FAULT_REG_POSTING_READ(engine__) \ + intel_uncore_posting_read((engine__)->uncore, RING_FAULT_REG(engine__)) + +#define GEN6_RING_FAULT_REG_RMW(engine__, clear__, set__) \ +({ \ + u32 __val; \ +\ + __val = intel_uncore_read((engine__)->uncore, \ + RING_FAULT_REG(engine__)); \ + __val &= ~(clear__); \ + __val |= (set__); \ + intel_uncore_write((engine__)->uncore, RING_FAULT_REG(engine__), \ + __val); \ +}) + /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. */ @@ -448,8 +467,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv); -void intel_engine_lost_context(struct intel_engine_cs *engine); - void intel_engines_reset_default_submission(struct drm_i915_private *i915); unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); @@ -526,6 +543,8 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine); struct i915_request * intel_engine_find_active_request(struct intel_engine_cs *engine); +u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class); + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) @@ -546,4 +565,10 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists) #endif +void intel_engine_init_active(struct intel_engine_cs *engine, + unsigned int subclass); +#define ENGINE_PHYSICAL 0 +#define ENGINE_MOCK 1 +#define ENGINE_VIRTUAL 2 + #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 2590f5904b67..7fd33e81c2d9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -24,10 +24,13 @@ #include <drm/drm_print.h> +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "intel_engine.h" #include "intel_engine_pm.h" +#include "intel_context.h" #include "intel_lrc.h" #include "intel_reset.h" @@ -156,7 +159,7 @@ static const struct engine_info intel_engines[] = { }; /** - * ___intel_engine_context_size() - return the size of the context for an engine + * intel_engine_context_size() - return the size of the context for an engine * @dev_priv: i915 device private * @class: engine class * @@ -169,8 +172,7 @@ static const struct engine_info intel_engines[] = { * in LRC mode, but does not include the "shared data page" used with * GuC submission. The caller should account for this if using the GuC. */ -static u32 -__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) { u32 cxt_size; @@ -327,8 +329,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->uabi_class = intel_engine_classes[info->class].uabi_class; - engine->context_size = __intel_engine_context_size(dev_priv, - engine->class); + engine->context_size = intel_engine_context_size(dev_priv, + engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; if (engine->context_size) @@ -525,7 +527,7 @@ static void cleanup_status_page(struct intel_engine_cs *engine) i915_vma_unpin(vma); i915_gem_object_unpin_map(vma->obj); - __i915_gem_object_release_unless_active(vma->obj); + i915_gem_object_put(vma->obj); } static int pin_ggtt_status_page(struct intel_engine_cs *engine, @@ -609,18 +611,13 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) { int err; + init_llist_head(&engine->barrier_tasks); + err = init_status_page(engine); if (err) return err; - err = i915_timeline_init(engine->i915, - &engine->timeline, - engine->status_page.vma); - if (err) - goto err_hwsp; - - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - + intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init_hangcheck(engine); @@ -633,10 +630,6 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); return 0; - -err_hwsp: - cleanup_status_page(engine); - return err; } /** @@ -793,6 +786,27 @@ static int pin_context(struct i915_gem_context *ctx, return 0; } +void +intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) +{ + INIT_LIST_HEAD(&engine->active.requests); + + spin_lock_init(&engine->active.lock); + lockdep_set_subclass(&engine->active.lock, subclass); + + /* + * Due to an interesting quirk in lockdep's internal debug tracking, + * after setting a subclass we must ensure the lock is used. Otherwise, + * nr_unused_locks is incremented once too often. + */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + local_irq_disable(); + lock_map_acquire(&engine->active.lock.dep_map); + lock_map_release(&engine->active.lock.dep_map); + local_irq_enable(); +#endif +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -856,6 +870,8 @@ err_unpin: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { + GEM_BUG_ON(!list_empty(&engine->active.requests)); + cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); @@ -868,8 +884,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->preempt_context) intel_context_unpin(engine->preempt_context); intel_context_unpin(engine->kernel_context); - - i915_timeline_fini(&engine->timeline); + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->wa_list); @@ -970,11 +985,12 @@ u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) return mcr_s_ss_select; } -static inline u32 -read_subslice_reg(struct drm_i915_private *dev_priv, int slice, - int subslice, i915_reg_t reg) +static u32 +read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, + i915_reg_t reg) { - struct intel_uncore *uncore = &dev_priv->uncore; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; u32 mcr_slice_subslice_mask; u32 mcr_slice_subslice_select; u32 default_mcr_s_ss_select; @@ -982,7 +998,7 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, u32 ret; enum forcewake_domains fw_domains; - if (INTEL_GEN(dev_priv) >= 11) { + if (INTEL_GEN(i915) >= 11) { mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | @@ -994,7 +1010,7 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, GEN8_MCR_SUBSLICE(subslice); } - default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); + default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(i915); fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ); @@ -1031,7 +1047,7 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1039,7 +1055,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, memset(instdone, 0, sizeof(*instdone)); - switch (INTEL_GEN(dev_priv)) { + switch (INTEL_GEN(i915)) { default: instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); @@ -1049,12 +1065,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + for_each_instdone_slice_subslice(i915, slice, subslice) { instdone->sampler[slice][subslice] = - read_subslice_reg(dev_priv, slice, subslice, + read_subslice_reg(engine, slice, subslice, GEN7_SAMPLER_INSTDONE); instdone->row[slice][subslice] = - read_subslice_reg(dev_priv, slice, subslice, + read_subslice_reg(engine, slice, subslice, GEN7_ROW_INSTDONE); } break; @@ -1100,7 +1116,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return true; /* If the whole device is asleep, the engine must be idle */ - wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); if (!wakeref) return true; @@ -1114,7 +1130,7 @@ static bool ring_is_idle(struct intel_engine_cs *engine) !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) idle = false; - intel_runtime_pm_put(dev_priv, wakeref); + intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); return idle; } @@ -1198,26 +1214,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) engine->set_default_submission(engine); } -/** - * intel_engine_lost_context: called when the GPU is reset into unknown state - * @engine: the engine - * - * We have either reset the GPU or otherwise about to lose state tracking of - * the current GPU logical state (e.g. suspend). On next use, it is therefore - * imperative that we make no presumptions about the current state and load - * from scratch. - */ -void intel_engine_lost_context(struct intel_engine_cs *engine) -{ - struct intel_context *ce; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); -} - bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { @@ -1315,12 +1311,13 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) } } -static void intel_engine_print_registers(const struct intel_engine_cs *engine, +static void intel_engine_print_registers(struct intel_engine_cs *engine, struct drm_printer *m) { struct drm_i915_private *dev_priv = engine->i915; const struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; u64 addr; if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) @@ -1401,15 +1398,16 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, idx, hws[idx * 2], hws[idx * 2 + 1]); } - rcu_read_lock(); + spin_lock_irqsave(&engine->active.lock, flags); for (idx = 0; idx < execlists_num_ports(execlists); idx++) { struct i915_request *rq; unsigned int count; + char hdr[80]; rq = port_unpack(&execlists->port[idx], &count); - if (rq) { - char hdr[80]; - + if (!rq) { + drm_printf(m, "\t\tELSP[%d] idle\n", idx); + } else if (!i915_request_signaled(rq)) { snprintf(hdr, sizeof(hdr), "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", idx, count, @@ -1418,11 +1416,11 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, hwsp_seqno(rq)); print_request(m, rq, hdr); } else { - drm_printf(m, "\t\tELSP[%d] idle\n", idx); + print_request(m, rq, "\t\tELSP[%d] rq: "); } } drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active); - rcu_read_unlock(); + spin_unlock_irqrestore(&engine->active.lock, flags); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", ENGINE_READ(engine, RING_PP_DIR_BASE)); @@ -1496,16 +1494,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tfirst "); - - rq = list_last_entry(&engine->timeline.requests, - struct i915_request, link); - if (&rq->link != &engine->timeline.requests) - print_request(m, rq, "\t\tlast "); - rq = intel_engine_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); @@ -1528,10 +1516,10 @@ void intel_engine_dump(struct intel_engine_cs *engine, rcu_read_unlock(); - wakeref = intel_runtime_pm_get_if_in_use(engine->i915); + wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); if (wakeref) { intel_engine_print_registers(engine, m); - intel_runtime_pm_put(engine->i915, wakeref); + intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); } else { drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } @@ -1586,7 +1574,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { @@ -1612,7 +1600,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) unlock: write_sequnlock(&engine->stats.lock); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); return err; } @@ -1697,22 +1685,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) * At all other times, we must assume the GPU is still running, but * we only care about the snapshot of this moment. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - list_for_each_entry(request, &engine->timeline.requests, link) { + spin_lock_irqsave(&engine->active.lock, flags); + list_for_each_entry(request, &engine->active.requests, sched.link) { if (i915_request_completed(request)) continue; if (!i915_request_started(request)) - break; + continue; /* More than one preemptible request may match! */ if (!match_ring(request)) - break; + continue; active = request; break; } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index ccf034764741..2ce00d3dc42a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -37,7 +37,7 @@ static int __engine_unpark(struct intel_wakeref *wf) void intel_engine_pm_get(struct intel_engine_cs *engine) { - intel_wakeref_get(engine->i915, &engine->wakeref, __engine_unpark); + intel_wakeref_get(&engine->i915->runtime_pm, &engine->wakeref, __engine_unpark); } void intel_engine_park(struct intel_engine_cs *engine) @@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; + + i915_request_add_barriers(rq); __i915_request_commit(rq); return false; @@ -98,6 +100,8 @@ static int __engine_park(struct intel_wakeref *wf) struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); + engine->saturated = 0; + /* * If one and only one request is completed between pm events, * we know that we are inside the kernel context and it is @@ -131,7 +135,7 @@ static int __engine_park(struct intel_wakeref *wf) void intel_engine_pm_put(struct intel_engine_cs *engine) { - intel_wakeref_put(engine->i915, &engine->wakeref, __engine_park); + intel_wakeref_put(&engine->i915->runtime_pm, &engine->wakeref, __engine_park); } void intel_engine_init__pm(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 40e774acc2cd..868b220214f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -11,6 +11,7 @@ #include <linux/irq_work.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/llist.h> #include <linux/types.h> #include "i915_gem.h" @@ -29,6 +30,7 @@ #define I915_CMD_HASH_ORDER 9 struct dma_fence; +struct drm_i915_gem_object; struct drm_i915_reg_table; struct i915_gem_context; struct i915_request; @@ -286,11 +288,18 @@ struct intel_engine_cs { struct intel_ring *buffer; - struct i915_timeline timeline; + struct { + spinlock_t lock; + struct list_head requests; + } active; + + struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ struct intel_context *preempt_context; /* pinned; optional */ + intel_engine_mask_t saturated; /* submitting semaphores too late? */ + unsigned long serial; unsigned long wakeref_serial; @@ -434,17 +443,6 @@ struct intel_engine_cs { struct intel_engine_execlists execlists; - /* Contexts are pinned whilst they are active on the GPU. The last - * context executed remains active whilst the GPU is idle - the - * switch away and write to the context object only occurs on the - * next execution. Contexts are only unpinned on retirement of the - * following request ensuring that we can always write to the object - * on the context switch even after idling. Across suspend, we switch - * to the kernel context and trash it as the save may not happen - * before the hardware is powered down. - */ - struct intel_context *last_retired_context; - /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index a34ece53a771..eec31e36aca7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -180,6 +180,7 @@ #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) #define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) #define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) #define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) #define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index ae7155f0e063..7b5967751762 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -52,7 +52,7 @@ static int intel_gt_unpark(struct intel_wakeref *wf) void intel_gt_pm_get(struct drm_i915_private *i915) { - intel_wakeref_get(i915, &i915->gt.wakeref, intel_gt_unpark); + intel_wakeref_get(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_unpark); } static int intel_gt_park(struct intel_wakeref *wf) @@ -77,7 +77,7 @@ static int intel_gt_park(struct intel_wakeref *wf) void intel_gt_pm_put(struct drm_i915_private *i915) { - intel_wakeref_put(i915, &i915->gt.wakeref, intel_gt_park); + intel_wakeref_put(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_park); } void intel_gt_pm_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c index 3a4d09b80fa0..6bcfa6456c45 100644 --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c @@ -223,8 +223,8 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, } static void hangcheck_declare_hang(struct drm_i915_private *i915, - unsigned int hung, - unsigned int stuck) + intel_engine_mask_t hung, + intel_engine_mask_t stuck) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -259,9 +259,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work) struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gpu_error.hangcheck_work.work); + intel_engine_mask_t hung = 0, stuck = 0, wedged = 0; struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned int hung = 0, stuck = 0, wedged = 0; intel_wakeref_t wakeref; if (!i915_modparams.enable_hangcheck) @@ -273,7 +273,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (i915_terminally_wedged(dev_priv)) return; - wakeref = intel_runtime_pm_get_if_in_use(dev_priv); + wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); if (!wakeref) return; @@ -324,7 +324,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (hung) hangcheck_declare_hang(dev_priv, hung, stuck); - intel_runtime_pm_put(dev_priv, wakeref); + intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); /* Reset timer in case GPU hangs without another request being added */ i915_queue_hangcheck(dev_priv); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 1f7bee0cae0c..b42b5f158295 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -133,6 +133,8 @@ */ #include <linux/interrupt.h> +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_vgpu.h" @@ -296,8 +298,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * Check against the first request in ELSP[1], it will, thanks to the * power of PI, be the highest priority of that context. */ - if (!list_is_last(&rq->link, &engine->timeline.requests) && - rq_prio(list_next_entry(rq, link)) > last_prio) + if (!list_is_last(&rq->sched.link, &engine->active.requests) && + rq_prio(list_next_entry(rq, sched.link)) > last_prio) return true; if (rb) { @@ -432,11 +434,11 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) struct list_head *uninitialized_var(pl); int prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline.requests, - link) { + &engine->active.requests, + sched.link) { struct intel_engine_cs *owner; if (i915_request_completed(rq)) @@ -445,7 +447,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) __i915_request_unsubmit(rq); unwind_wa_tail(rq); - GEM_BUG_ON(rq->hw_context->active); + GEM_BUG_ON(rq->hw_context->inflight); /* * Push the request back into the queue for later resubmission. @@ -463,7 +465,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) } GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - list_add(&rq->sched.link, pl); + list_move(&rq->sched.link, pl); active = rq; } else { rq->engine = owner; @@ -514,11 +516,11 @@ execlists_user_end(struct intel_engine_execlists *execlists) static inline void execlists_context_schedule_in(struct i915_request *rq) { - GEM_BUG_ON(rq->hw_context->active); + GEM_BUG_ON(rq->hw_context->inflight); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); intel_engine_context_in(rq->engine); - rq->hw_context->active = rq->engine; + rq->hw_context->inflight = rq->engine; } static void kick_siblings(struct i915_request *rq) @@ -533,7 +535,7 @@ static void kick_siblings(struct i915_request *rq) static inline void execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { - rq->hw_context->active = NULL; + rq->hw_context->inflight = NULL; intel_engine_context_out(rq->engine); execlists_context_status_change(rq, status); trace_i915_request_out(rq); @@ -776,7 +778,7 @@ static bool virtual_matches(const struct virtual_engine *ve, const struct i915_request *rq, const struct intel_engine_cs *engine) { - const struct intel_engine_cs *active; + const struct intel_engine_cs *inflight; if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ return false; @@ -790,8 +792,8 @@ static bool virtual_matches(const struct virtual_engine *ve, * we reuse the register offsets). This is a very small * hystersis on the greedy seelction algorithm. */ - active = READ_ONCE(ve->context.active); - if (active && active != engine) + inflight = READ_ONCE(ve->context.inflight); + if (inflight && inflight != engine) return false; return true; @@ -931,11 +933,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) rb_entry(rb, typeof(*ve), nodes[engine->id].rb); struct i915_request *rq; - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); rq = ve->request; if (unlikely(!rq)) { /* lost the race to a sibling */ - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); rb = rb_first_cached(&execlists->virtual); @@ -948,13 +950,13 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (rq_prio(rq) >= queue_prio(execlists)) { if (!virtual_matches(ve, rq, engine)) { - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); rb = rb_next(rb); continue; } if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); return; /* leave this rq for another engine */ } @@ -979,7 +981,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) u32 *regs = ve->context.lrc_reg_state; unsigned int n; - GEM_BUG_ON(READ_ONCE(ve->context.active)); + GEM_BUG_ON(READ_ONCE(ve->context.inflight)); virtual_update_register_offsets(regs, engine); if (!list_empty(&ve->context.signals)) @@ -1009,7 +1011,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last = rq; } - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); break; } @@ -1066,8 +1068,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - list_del_init(&rq->sched.link); - __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -1168,7 +1168,8 @@ static void process_csb(struct intel_engine_cs *engine) const u8 num_entries = execlists->csb_size; u8 head, tail; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); + GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915)); /* * Note that csb_write, csb_status may be either in HWSP or mmio. @@ -1328,7 +1329,7 @@ static void process_csb(struct intel_engine_cs *engine) static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) { - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); process_csb(engine); if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT)) @@ -1349,15 +1350,16 @@ static void execlists_submission_tasklet(unsigned long data) !!intel_wakeref_active(&engine->wakeref), engine->execlists.active); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void queue_request(struct intel_engine_cs *engine, struct i915_sched_node *node, int prio) { + GEM_BUG_ON(!list_empty(&node->link)); list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); } @@ -1388,7 +1390,7 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); queue_request(engine, &request->sched, rq_prio(request)); @@ -1397,7 +1399,7 @@ static void execlists_submit_request(struct i915_request *request) submit_queue(engine, rq_prio(request)); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void __execlists_context_fini(struct intel_context *ce) @@ -1420,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref) intel_context_free(ce); } -static int __context_pin(struct i915_vma *vma) -{ - unsigned int flags; - int err; - - flags = PIN_GLOBAL | PIN_HIGH; - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); - - err = i915_vma_pin(vma, 0, 0, flags); - if (err) - return err; - - vma->obj->pin_global++; - vma->obj->mm.dirty = true; - - return 0; -} - -static void __context_unpin(struct i915_vma *vma) -{ - vma->obj->pin_global--; - __i915_vma_unpin(vma); -} - static void execlists_context_unpin(struct intel_context *ce) { - struct intel_engine_cs *engine; - - /* - * The tasklet may still be using a pointer to our state, via an - * old request. However, since we know we only unpin the context - * on retirement of the following request, we know that the last - * request referencing us will have had a completion CS interrupt. - * If we see that it is still active, it means that the tasklet hasn't - * had the chance to run yet; let it run before we teardown the - * reference it may use. - */ - engine = READ_ONCE(ce->active); - if (unlikely(engine)) { - unsigned long flags; - - spin_lock_irqsave(&engine->timeline.lock, flags); - process_csb(engine); - spin_unlock_irqrestore(&engine->timeline.lock, flags); - - GEM_BUG_ON(READ_ONCE(ce->active)); - } - i915_gem_context_unpin_hw_id(ce->gem_context); - - intel_ring_unpin(ce->ring); - i915_gem_object_unpin_map(ce->state->obj); - __context_unpin(ce->state); + intel_ring_unpin(ce->ring); } static void @@ -1503,14 +1456,17 @@ __execlists_context_pin(struct intel_context *ce, void *vaddr; int ret; - GEM_BUG_ON(!ce->gem_context->ppgtt); + GEM_BUG_ON(!ce->gem_context->vm); ret = execlists_context_deferred_alloc(ce, engine); if (ret) goto err; GEM_BUG_ON(!ce->state); - ret = __context_pin(ce->state); + ret = intel_context_active_acquire(ce, + engine->i915->ggtt.pin_bias | + PIN_OFFSET_BIAS | + PIN_HIGH); if (ret) goto err; @@ -1519,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce, I915_MAP_OVERRIDE); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto unpin_vma; + goto unpin_active; } ret = intel_ring_pin(ce->ring); @@ -1540,8 +1496,8 @@ unpin_ring: intel_ring_unpin(ce->ring); unpin_map: i915_gem_object_unpin_map(ce->state->obj); -unpin_vma: - __context_unpin(ce->state); +unpin_active: + intel_context_active_release(ce); err: return ret; } @@ -1619,7 +1575,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) static int emit_pdps(struct i915_request *rq) { const struct intel_engine_cs * const engine = rq->engine; - struct i915_hw_ppgtt * const ppgtt = rq->gem_context->ppgtt; + struct i915_ppgtt * const ppgtt = + i915_vm_to_ppgtt(rq->gem_context->vm); int err, i; u32 *cs; @@ -1692,7 +1649,7 @@ static int execlists_request_alloc(struct i915_request *request) */ /* Unconditionally invalidate GPU caches and TLBs. */ - if (i915_vm_is_4lvl(&request->gem_context->ppgtt->vm)) + if (i915_vm_is_4lvl(request->gem_context->vm)) ret = request->engine->emit_flush(request, EMIT_INVALIDATE); else ret = emit_pdps(request); @@ -1919,7 +1876,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) struct i915_vma *vma; int err; - obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); + obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -2019,31 +1976,30 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) static void enable_execlists(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ - if (INTEL_GEN(dev_priv) >= 11) - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + if (INTEL_GEN(engine->i915) >= 11) + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); else - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + ENGINE_WRITE(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - I915_WRITE(RING_MI_MODE(engine->mmio_base), - _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - I915_WRITE(RING_HWS_PGA(engine->mmio_base), - i915_ggtt_offset(engine->status_page.vma)); - POSTING_READ(RING_HWS_PGA(engine->mmio_base)); + ENGINE_WRITE(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); + ENGINE_POSTING_READ(engine, RING_HWS_PGA); } static bool unexpected_starting_state(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; bool unexpected = false; - if (I915_READ(RING_MI_MODE(engine->mmio_base)) & STOP_RING) { + if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) { DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n"); unexpected = true; } @@ -2094,8 +2050,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) intel_engine_stop_cs(engine); /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->timeline.lock, flags); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static bool lrc_regs_ok(const struct i915_request *rq) @@ -2138,11 +2094,11 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists) static struct i915_request *active_request(struct i915_request *rq) { - const struct list_head * const list = &rq->engine->timeline.requests; + const struct list_head * const list = &rq->engine->active.requests; const struct intel_context * const context = rq->hw_context; struct i915_request *active = NULL; - list_for_each_entry_from_reverse(rq, list, link) { + list_for_each_entry_from_reverse(rq, list, sched.link) { if (i915_request_completed(rq)) break; @@ -2259,11 +2215,11 @@ static void execlists_reset(struct intel_engine_cs *engine, bool stalled) GEM_TRACE("%s\n", engine->name); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_reset(engine, stalled); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void nop_submission_tasklet(unsigned long data) @@ -2294,12 +2250,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) * submission's irq state, we also wish to remind ourselves that * it is irq state.) */ - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __execlists_reset(engine, true); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, sched.link) { if (!i915_request_signaled(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -2330,7 +2286,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) rb_erase_cached(rb, &execlists->virtual); RB_CLEAR_NODE(rb); - spin_lock(&ve->base.timeline.lock); + spin_lock(&ve->base.active.lock); if (ve->request) { ve->request->engine = engine; __i915_request_submit(ve->request); @@ -2339,7 +2295,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) ve->base.execlists.queue_priority_hint = INT_MIN; ve->request = NULL; } - spin_unlock(&ve->base.timeline.lock); + spin_unlock(&ve->base.active.lock); } /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -2351,7 +2307,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); execlists->tasklet.func = nop_submission_tasklet; - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void execlists_reset_finish(struct intel_engine_cs *engine) @@ -2737,8 +2693,9 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) int intel_execlists_submission_init(struct intel_engine_cs *engine) { - struct drm_i915_private *i915 = engine->i915; struct intel_engine_execlists * const execlists = &engine->execlists; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; u32 base = engine->mmio_base; int ret; @@ -2758,12 +2715,12 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) DRM_ERROR("WA batch buffer initialization failed\n"); if (HAS_LOGICAL_RING_ELSQ(i915)) { - execlists->submit_reg = i915->uncore.regs + + execlists->submit_reg = uncore->regs + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); - execlists->ctrl_reg = i915->uncore.regs + + execlists->ctrl_reg = uncore->regs + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); } else { - execlists->submit_reg = i915->uncore.regs + + execlists->submit_reg = uncore->regs + i915_mmio_reg_offset(RING_ELSP(base)); } @@ -2778,7 +2735,7 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) execlists->csb_write = &engine->status_page.addr[intel_hws_csb_write_index(i915)]; - if (INTEL_GEN(engine->i915) < 11) + if (INTEL_GEN(i915) < 11) execlists->csb_size = GEN8_CSB_ENTRIES; else execlists->csb_size = GEN11_CSB_ENTRIES; @@ -2822,7 +2779,7 @@ static void execlists_init_reg_state(u32 *regs, struct intel_engine_cs *engine, struct intel_ring *ring) { - struct i915_hw_ppgtt *ppgtt = ce->gem_context->ppgtt; + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm); bool rcs = engine->class == RENDER_CLASS; u32 base = engine->mmio_base; @@ -3010,7 +2967,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, */ context_size += LRC_HEADER_PAGES * PAGE_SIZE; - ctx_obj = i915_gem_object_create(engine->i915, context_size); + ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); if (IS_ERR(ctx_obj)) return PTR_ERR(ctx_obj); @@ -3053,14 +3010,20 @@ error_deref_obj: return ret; } +static struct list_head *virtual_queue(struct virtual_engine *ve) +{ + return &ve->base.execlists.default_priolist.requests[0]; +} + static void virtual_context_destroy(struct kref *kref) { struct virtual_engine *ve = container_of(kref, typeof(*ve), context.ref); unsigned int n; + GEM_BUG_ON(!list_empty(virtual_queue(ve))); GEM_BUG_ON(ve->request); - GEM_BUG_ON(ve->context.active); + GEM_BUG_ON(ve->context.inflight); for (n = 0; n < ve->num_siblings; n++) { struct intel_engine_cs *sibling = ve->siblings[n]; @@ -3069,13 +3032,13 @@ static void virtual_context_destroy(struct kref *kref) if (RB_EMPTY_NODE(node)) continue; - spin_lock_irq(&sibling->timeline.lock); + spin_lock_irq(&sibling->active.lock); /* Detachment is lazily performed in the execlists tasklet */ if (!RB_EMPTY_NODE(node)) rb_erase_cached(node, &sibling->execlists.virtual); - spin_unlock_irq(&sibling->timeline.lock); + spin_unlock_irq(&sibling->active.lock); } GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); @@ -3083,8 +3046,6 @@ static void virtual_context_destroy(struct kref *kref) __execlists_context_fini(&ve->context); kfree(ve->bonds); - - i915_timeline_fini(&ve->base.timeline); kfree(ve); } @@ -3142,7 +3103,6 @@ static void virtual_context_exit(struct intel_context *ce) struct virtual_engine *ve = container_of(ce, typeof(*ve), context); unsigned int n; - ce->saturated = 0; for (n = 0; n < ve->num_siblings; n++) intel_engine_pm_put(ve->siblings[n]); } @@ -3204,16 +3164,16 @@ static void virtual_submission_tasklet(unsigned long data) if (unlikely(!(mask & sibling->mask))) { if (!RB_EMPTY_NODE(&node->rb)) { - spin_lock(&sibling->timeline.lock); + spin_lock(&sibling->active.lock); rb_erase_cached(&node->rb, &sibling->execlists.virtual); RB_CLEAR_NODE(&node->rb); - spin_unlock(&sibling->timeline.lock); + spin_unlock(&sibling->active.lock); } continue; } - spin_lock(&sibling->timeline.lock); + spin_lock(&sibling->active.lock); if (!RB_EMPTY_NODE(&node->rb)) { /* @@ -3257,7 +3217,7 @@ submit_engine: tasklet_hi_schedule(&sibling->execlists.tasklet); } - spin_unlock(&sibling->timeline.lock); + spin_unlock(&sibling->active.lock); } local_irq_enable(); } @@ -3274,9 +3234,13 @@ static void virtual_submit_request(struct i915_request *rq) GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); GEM_BUG_ON(ve->request); + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + ve->base.execlists.queue_priority_hint = rq_prio(rq); WRITE_ONCE(ve->request, rq); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + tasklet_schedule(&ve->base.execlists.tasklet); } @@ -3338,12 +3302,24 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; ve->base.flags = I915_ENGINE_IS_VIRTUAL; + /* + * The decision on whether to submit a request using semaphores + * depends on the saturated state of the engine. We only compute + * this during HW submission of the request, and we need for this + * state to be globally applied to all requests being submitted + * to this engine. Virtual engines encompass more than one physical + * engine and so we cannot accurately tell in advance if one of those + * engines is already saturated and so cannot afford to use a semaphore + * and be pessimized in priority for doing so -- if we are the only + * context using semaphores after all other clients have stopped, we + * will be starved on the saturated system. Such a global switch for + * semaphores is less than ideal, but alas is the current compromise. + */ + ve->base.saturated = ALL_ENGINES; + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - err = i915_timeline_init(ctx->i915, &ve->base.timeline, NULL); - if (err) - goto err_put; - i915_timeline_set_subclass(&ve->base.timeline, TIMELINE_VIRTUAL); + intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); intel_engine_init_execlists(&ve->base); @@ -3354,6 +3330,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx, ve->base.submit_request = virtual_submit_request; ve->base.bond_execute = virtual_bond_execute; + INIT_LIST_HEAD(virtual_queue(ve)); ve->base.execlists.queue_priority_hint = INT_MIN; tasklet_init(&ve->base.execlists.tasklet, virtual_submission_tasklet, @@ -3508,11 +3485,11 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, unsigned int count; struct rb_node *rb; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); last = NULL; count = 0; - list_for_each_entry(rq, &engine->timeline.requests, link) { + list_for_each_entry(rq, &engine->active.requests, sched.link) { if (count++ < max - 1) show_request(m, rq, "\t\tE "); else @@ -3575,7 +3552,7 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine, show_request(m, last, "\t\tV "); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } void intel_lr_context_reset(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index e029aee87adf..c2bba82bcc16 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -24,7 +24,15 @@ #ifndef _INTEL_LRC_H_ #define _INTEL_LRC_H_ -#include "intel_engine.h" +#include <linux/types.h> + +struct drm_printer; + +struct drm_i915_private; +struct i915_gem_context; +struct i915_request; +struct intel_context; +struct intel_engine_cs; /* Execlists regs */ #define RING_ELSP(base) _MMIO((base) + 0x230) @@ -96,10 +104,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine); */ #define LRC_HEADER_PAGES LRC_PPHWSP_PN -struct drm_printer; - -struct drm_i915_private; - void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void intel_lr_context_reset(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 5ef932d810a7..6bf34738b4e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -55,7 +55,7 @@ #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ - const u64 addr__ = px_dma(&ppgtt->pml4); \ + const u64 addr__ = px_dma(ppgtt->pd); \ (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ } while (0) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 79df66022d3a..1f9db50b1869 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -200,6 +200,14 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { MOCS_ENTRY(15, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ L3_3_WB), \ + /* Bypass LLC - Uncached (EHL+) */ \ + MOCS_ENTRY(16, \ + LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ + L3_1_UC), \ + /* Bypass LLC - L3 (Read-Only) (EHL+) */ \ + MOCS_ENTRY(17, \ + LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ + L3_3_WB), \ /* Self-Snoop - L3 + LLC */ \ MOCS_ENTRY(18, \ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 8c60f7550f9c..4c478b38e420 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -7,6 +7,10 @@ #include <linux/sched/mm.h> #include <linux/stop_machine.h> +#include "display/intel_overlay.h" + +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gpu_error.h" #include "i915_irq.h" @@ -15,7 +19,6 @@ #include "intel_reset.h" #include "intel_guc.h" -#include "intel_overlay.h" #define RESET_MAX_RETRIES 3 @@ -47,12 +50,12 @@ static void engine_skip_context(struct i915_request *rq) struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *hung_ctx = rq->gem_context; - lockdep_assert_held(&engine->timeline.lock); + lockdep_assert_held(&engine->active.lock); if (!i915_request_is_active(rq)) return; - list_for_each_entry_continue(rq, &engine->timeline.requests, link) + list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->gem_context == hung_ctx) i915_request_skip(rq, -EIO); } @@ -128,7 +131,7 @@ void i915_reset_request(struct i915_request *rq, bool guilty) rq->fence.seqno, yesno(guilty)); - lockdep_assert_held(&rq->engine->timeline.lock); + lockdep_assert_held(&rq->engine->active.lock); GEM_BUG_ON(i915_request_completed(rq)); if (guilty) { @@ -693,19 +696,19 @@ static void revoke_mmaps(struct drm_i915_private *i915) { int i; - for (i = 0; i < i915->num_fence_regs; i++) { + for (i = 0; i < i915->ggtt.num_fences; i++) { struct drm_vma_offset_node *node; struct i915_vma *vma; u64 vma_offset; - vma = READ_ONCE(i915->fence_regs[i].vma); + vma = READ_ONCE(i915->ggtt.fence_regs[i].vma); if (!vma) continue; if (!i915_vma_has_userfault(vma)) continue; - GEM_BUG_ON(vma->fence != &i915->fence_regs[i]); + GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]); node = &vma->obj->base.vma_node; vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; unmap_mapping_range(i915->drm.anon_inode->i_mapping, @@ -783,10 +786,10 @@ static void nop_submit_request(struct i915_request *request) engine->name, request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); __i915_request_submit(request); i915_request_mark_complete(request); - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); intel_engine_queue_breadcrumbs(engine); } @@ -849,7 +852,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) intel_wakeref_t wakeref; mutex_lock(&error->wedge_mutex); - with_intel_runtime_pm(i915, wakeref) + with_intel_runtime_pm(&i915->runtime_pm, wakeref) __i915_gem_set_wedged(i915); mutex_unlock(&error->wedge_mutex); } @@ -976,10 +979,11 @@ void i915_reset(struct drm_i915_private *i915, might_sleep(); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); + mutex_lock(&error->wedge_mutex); /* Clear any previous failed attempts at recovery. Time to try again. */ if (!__i915_gem_unset_wedged(i915)) - return; + goto unlock; if (reason) dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); @@ -1027,6 +1031,8 @@ void i915_reset(struct drm_i915_private *i915, finish: reset_finish(i915); +unlock: + mutex_unlock(&error->wedge_mutex); return; taint: @@ -1142,9 +1148,7 @@ static void i915_reset_device(struct drm_i915_private *i915, /* Flush everyone using a resource about to be clobbered */ synchronize_srcu_expedited(&error->reset_backoff_srcu); - mutex_lock(&error->wedge_mutex); i915_reset(i915, engine_mask, reason); - mutex_unlock(&error->wedge_mutex); intel_finish_reset(i915); } @@ -1158,7 +1162,14 @@ static void clear_register(struct intel_uncore *uncore, i915_reg_t reg) intel_uncore_rmw(uncore, reg, 0, 0); } -void i915_clear_error_registers(struct drm_i915_private *i915) +static void gen8_clear_engine_error_register(struct intel_engine_cs *engine) +{ + GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0); + GEN6_RING_FAULT_REG_POSTING_READ(engine); +} + +static void clear_error_registers(struct drm_i915_private *i915, + intel_engine_mask_t engine_mask) { struct intel_uncore *uncore = &i915->uncore; u32 eir; @@ -1191,15 +1202,74 @@ void i915_clear_error_registers(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, i915, id) { - rmw_clear(uncore, - RING_FAULT_REG(engine), RING_FAULT_VALID); - intel_uncore_posting_read(uncore, - RING_FAULT_REG(engine)); + for_each_engine_masked(engine, i915, engine_mask, id) + gen8_clear_engine_error_register(engine); + } +} + +static void gen6_check_faults(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + u32 fault; + + for_each_engine(engine, dev_priv, id) { + fault = GEN6_RING_FAULT_REG_READ(engine); + if (fault & RING_FAULT_VALID) { + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08lx\n" + "\tAddress space: %s\n" + "\tSource ID: %d\n" + "\tType: %d\n", + fault & PAGE_MASK, + fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); } } } +static void gen8_check_faults(struct drm_i915_private *dev_priv) +{ + u32 fault = I915_READ(GEN8_RING_FAULT_REG); + + if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); + fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + + DRM_DEBUG_DRIVER("Unexpected fault\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" + "\tEngine ID: %d\n" + "\tSource ID: %d\n" + "\tType: %d\n", + upper_32_bits(fault_addr), + lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", + GEN8_RING_FAULT_ENGINE_ID(fault), + RING_FAULT_SRCID(fault), + RING_FAULT_FAULT_TYPE(fault)); + } +} + +void i915_check_and_clear_faults(struct drm_i915_private *i915) +{ + /* From GEN8 onwards we only have one 'All Engine Fault Register' */ + if (INTEL_GEN(i915) >= 8) + gen8_check_faults(i915); + else if (INTEL_GEN(i915) >= 6) + gen6_check_faults(i915); + else + return; + + clear_error_registers(i915, ALL_ENGINES); +} + /** * i915_handle_error - handle a gpu error * @i915: i915 device private @@ -1242,13 +1312,13 @@ void i915_handle_error(struct drm_i915_private *i915, * isn't the case at least when we get here by doing a * simulated reset via debugfs, so get an RPM reference. */ - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); engine_mask &= INTEL_INFO(i915)->engine_mask; if (flags & I915_ERROR_CAPTURE) { i915_capture_error_state(i915, engine_mask, msg); - i915_clear_error_registers(i915); + clear_error_registers(i915, engine_mask); } /* @@ -1305,7 +1375,7 @@ void i915_handle_error(struct drm_i915_private *i915, wake_up_all(&error->reset_queue); out: - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); } int i915_reset_trylock(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index b52efaab4941..580ebdb59eca 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -25,7 +25,7 @@ void i915_handle_error(struct drm_i915_private *i915, const char *fmt, ...); #define I915_ERROR_CAPTURE BIT(0) -void i915_clear_error_registers(struct drm_i915_private *i915); +void i915_check_and_clear_faults(struct drm_i915_private *i915); void i915_reset(struct drm_i915_private *i915, intel_engine_mask_t stalled_mask, diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index f0d60affdba3..c6023bc9452d 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -31,9 +31,12 @@ #include <drm/i915_drm.h> +#include "gem/i915_gem_context.h" + #include "i915_drv.h" #include "i915_gem_render_state.h" #include "i915_trace.h" +#include "intel_context.h" #include "intel_reset.h" #include "intel_workarounds.h" @@ -727,14 +730,13 @@ static void reset_prepare(struct intel_engine_cs *engine) static void reset_ring(struct intel_engine_cs *engine, bool stalled) { - struct i915_timeline *tl = &engine->timeline; struct i915_request *pos, *rq; unsigned long flags; u32 head; rq = NULL; - spin_lock_irqsave(&tl->lock, flags); - list_for_each_entry(pos, &tl->requests, link) { + spin_lock_irqsave(&engine->active.lock, flags); + list_for_each_entry(pos, &engine->active.requests, sched.link) { if (!i915_request_completed(pos)) { rq = pos; break; @@ -788,7 +790,7 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled) } engine->buffer->head = intel_ring_wrap(engine->buffer, head); - spin_unlock_irqrestore(&tl->lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void reset_finish(struct intel_engine_cs *engine) @@ -874,10 +876,10 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, link) { + list_for_each_entry(request, &engine->active.requests, sched.link) { if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); @@ -886,7 +888,7 @@ static void cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -973,20 +975,20 @@ i9xx_irq_disable(struct intel_engine_cs *engine) static void i8xx_irq_enable(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; - dev_priv->irq_mask &= ~engine->irq_enable_mask; - I915_WRITE16(GEN2_IMR, dev_priv->irq_mask); - POSTING_READ16(RING_IMR(engine->mmio_base)); + i915->irq_mask &= ~engine->irq_enable_mask; + intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); + ENGINE_POSTING_READ16(engine, RING_IMR); } static void i8xx_irq_disable(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; + struct drm_i915_private *i915 = engine->i915; - dev_priv->irq_mask |= engine->irq_enable_mask; - I915_WRITE16(GEN2_IMR, dev_priv->irq_mask); + i915->irq_mask |= engine->irq_enable_mask; + intel_uncore_write16(&i915->uncore, GEN2_IMR, i915->irq_mask); } static int @@ -1264,8 +1266,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); - GEM_BUG_ON(timeline == &engine->timeline); - lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) @@ -1299,10 +1299,9 @@ intel_engine_create_ring(struct intel_engine_cs *engine, void intel_ring_free(struct kref *ref) { struct intel_ring *ring = container_of(ref, typeof(*ring), ref); - struct drm_i915_gem_object *obj = ring->vma->obj; i915_vma_close(ring->vma); - __i915_gem_object_release_unless_active(obj); + i915_vma_put(ring->vma); i915_timeline_put(ring->timeline); kfree(ring); @@ -1328,64 +1327,28 @@ static void ring_context_destroy(struct kref *ref) static int __context_pin_ppgtt(struct i915_gem_context *ctx) { - struct i915_hw_ppgtt *ppgtt; + struct i915_address_space *vm; int err = 0; - ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; - if (ppgtt) - err = gen6_ppgtt_pin(ppgtt); + vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm; + if (vm) + err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm))); return err; } static void __context_unpin_ppgtt(struct i915_gem_context *ctx) { - struct i915_hw_ppgtt *ppgtt; + struct i915_address_space *vm; - ppgtt = ctx->ppgtt ?: ctx->i915->mm.aliasing_ppgtt; - if (ppgtt) - gen6_ppgtt_unpin(ppgtt); -} - -static int __context_pin(struct intel_context *ce) -{ - struct i915_vma *vma; - int err; - - vma = ce->state; - if (!vma) - return 0; - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) - return err; - - /* - * And mark is as a globally pinned object to let the shrinker know - * it cannot reclaim the object until we release it. - */ - vma->obj->pin_global++; - vma->obj->mm.dirty = true; - - return 0; -} - -static void __context_unpin(struct intel_context *ce) -{ - struct i915_vma *vma; - - vma = ce->state; - if (!vma) - return; - - vma->obj->pin_global--; - i915_vma_unpin(vma); + vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm; + if (vm) + gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); } static void ring_context_unpin(struct intel_context *ce) { __context_unpin_ppgtt(ce->gem_context); - __context_unpin(ce); } static struct i915_vma * @@ -1396,7 +1359,7 @@ alloc_context_vma(struct intel_engine_cs *engine) struct i915_vma *vma; int err; - obj = i915_gem_object_create(i915, engine->context_size); + obj = i915_gem_object_create_shmem(i915, engine->context_size); if (IS_ERR(obj)) return ERR_CAST(obj); @@ -1475,18 +1438,18 @@ static int ring_context_pin(struct intel_context *ce) ce->state = vma; } - err = __context_pin(ce); + err = intel_context_active_acquire(ce, PIN_HIGH); if (err) return err; err = __context_pin_ppgtt(ce->gem_context); if (err) - goto err_unpin; + goto err_active; return 0; -err_unpin: - __context_unpin(ce); +err_active: + intel_context_active_release(ce); return err; } @@ -1506,8 +1469,7 @@ static const struct intel_context_ops ring_context_ops = { .destroy = ring_context_destroy, }; -static int load_pd_dir(struct i915_request *rq, - const struct i915_hw_ppgtt *ppgtt) +static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) { const struct intel_engine_cs * const engine = rq->engine; u32 *cs; @@ -1522,7 +1484,7 @@ static int load_pd_dir(struct i915_request *rq, *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = ppgtt->pd.base.ggtt_offset << 10; + *cs++ = ppgtt->pd->base.ggtt_offset << 10; intel_ring_advance(rq, cs); @@ -1702,14 +1664,16 @@ static int switch_context(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct i915_gem_context *ctx = rq->gem_context; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; + struct i915_address_space *vm = + ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm; unsigned int unwind_mm = 0; u32 hw_flags = 0; int ret, i; GEM_BUG_ON(HAS_EXECLISTS(rq->i915)); - if (ppgtt) { + if (vm) { + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); int loops; /* @@ -1756,7 +1720,7 @@ static int switch_context(struct i915_request *rq) goto err_mm; } - if (ppgtt) { + if (vm) { ret = engine->emit_flush(rq, EMIT_INVALIDATE); if (ret) goto err_mm; @@ -1799,7 +1763,7 @@ static int switch_context(struct i915_request *rq) err_mm: if (unwind_mm) - ppgtt->pd_dirty_engines |= unwind_mm; + i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm; err: return ret; } @@ -1851,7 +1815,7 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) return -ENOSPC; timeout = i915_request_wait(target, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (timeout < 0) return timeout; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index 7f448f3bea0b..a0756f006f5f 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -8,6 +8,23 @@ #include "intel_lrc_reg.h" #include "intel_sseu.h" +unsigned int +intel_sseu_subslice_total(const struct sseu_dev_info *sseu) +{ + unsigned int i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) + total += hweight8(sseu->subslice_mask[i]); + + return total; +} + +unsigned int +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) +{ + return hweight8(sseu->subslice_mask[slice]); +} + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu) { diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 73bc824094e8..b50d0401a4e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -8,11 +8,13 @@ #define __INTEL_SSEU_H__ #include <linux/types.h> +#include <linux/kernel.h> struct drm_i915_private; #define GEN_MAX_SLICES (6) /* CNL upper bound */ #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ +#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) struct sseu_dev_info { u8 slice_mask; @@ -61,6 +63,12 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu) return value; } +unsigned int +intel_sseu_subslice_total(const struct sseu_dev_info *sseu); + +unsigned int +intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); + u32 intel_sseu_make_rpcs(struct drm_i915_private *i915, const struct intel_sseu *req_sseu); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ce4bcca3f83c..15e90fd2cfdc 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -5,6 +5,7 @@ */ #include "i915_drv.h" +#include "intel_context.h" #include "intel_workarounds.h" /** @@ -37,7 +38,7 @@ * costly and simplifies things. We can revisit this in the future. * * Layout - * '''''' + * ~~~~~~ * * Keep things in this file ordered by WA type, as per the above (context, GT, * display, register whitelist, batchbuffer). Then, inside each type, keep the @@ -535,6 +536,12 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | GEN8_ERRDETBCTRL); + /* WaDisableBankHangMode:icl */ + wa_write(wal, + GEN8_L3CNTLREG, + intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) | + GEN8_ERRDETBCTRL); + /* Wa_1604370585:icl (pre-prod) * Formerly known as WaPushConstantDereferenceHoldDisable */ @@ -1011,7 +1018,7 @@ bool intel_gt_verify_workarounds(struct drm_i915_private *i915, } static void -whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) +whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags) { struct i915_wa wa = { .reg = reg @@ -1020,9 +1027,16 @@ whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS)) return; + wa.reg.reg |= flags; _wa_add(wal, &wa); } +static void +whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg) +{ + whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW); +} + static void gen9_whitelist_build(struct i915_wa_list *w) { /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ @@ -1035,56 +1049,103 @@ static void gen9_whitelist_build(struct i915_wa_list *w) whitelist_reg(w, GEN8_HDC_CHICKEN1); } -static void skl_whitelist_build(struct i915_wa_list *w) +static void skl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + if (engine->class != RENDER_CLASS) + return; + gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:skl */ whitelist_reg(w, GEN8_L3SQCREG4); } -static void bxt_whitelist_build(struct i915_wa_list *w) +static void bxt_whitelist_build(struct intel_engine_cs *engine) { - gen9_whitelist_build(w); + if (engine->class != RENDER_CLASS) + return; + + gen9_whitelist_build(&engine->whitelist); } -static void kbl_whitelist_build(struct i915_wa_list *w) +static void kbl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + if (engine->class != RENDER_CLASS) + return; + gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:kbl */ whitelist_reg(w, GEN8_L3SQCREG4); } -static void glk_whitelist_build(struct i915_wa_list *w) +static void glk_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + if (engine->class != RENDER_CLASS) + return; + gen9_whitelist_build(w); /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); } -static void cfl_whitelist_build(struct i915_wa_list *w) +static void cfl_whitelist_build(struct intel_engine_cs *engine) { - gen9_whitelist_build(w); + if (engine->class != RENDER_CLASS) + return; + + gen9_whitelist_build(&engine->whitelist); } -static void cnl_whitelist_build(struct i915_wa_list *w) +static void cnl_whitelist_build(struct intel_engine_cs *engine) { + struct i915_wa_list *w = &engine->whitelist; + + if (engine->class != RENDER_CLASS) + return; + /* WaEnablePreemptionGranularityControlByUMD:cnl */ whitelist_reg(w, GEN8_CS_CHICKEN1); } -static void icl_whitelist_build(struct i915_wa_list *w) +static void icl_whitelist_build(struct intel_engine_cs *engine) { - /* WaAllowUMDToModifyHalfSliceChicken7:icl */ - whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); - - /* WaAllowUMDToModifySamplerMode:icl */ - whitelist_reg(w, GEN10_SAMPLER_MODE); + struct i915_wa_list *w = &engine->whitelist; - /* WaEnableStateCacheRedirectToCS:icl */ - whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + switch (engine->class) { + case RENDER_CLASS: + /* WaAllowUMDToModifyHalfSliceChicken7:icl */ + whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7); + + /* WaAllowUMDToModifySamplerMode:icl */ + whitelist_reg(w, GEN10_SAMPLER_MODE); + + /* WaEnableStateCacheRedirectToCS:icl */ + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); + break; + + case VIDEO_DECODE_CLASS: + /* hucStatusRegOffset */ + whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base), + RING_FORCE_TO_NONPRIV_RD); + /* hucUKernelHdrInfoRegOffset */ + whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base), + RING_FORCE_TO_NONPRIV_RD); + /* hucStatus2RegOffset */ + whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base), + RING_FORCE_TO_NONPRIV_RD); + break; + + default: + break; + } } void intel_engine_init_whitelist(struct intel_engine_cs *engine) @@ -1092,25 +1153,22 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; struct i915_wa_list *w = &engine->whitelist; - if (engine->class != RENDER_CLASS) - return; - wa_init_start(w, "whitelist"); if (IS_GEN(i915, 11)) - icl_whitelist_build(w); + icl_whitelist_build(engine); else if (IS_CANNONLAKE(i915)) - cnl_whitelist_build(w); + cnl_whitelist_build(engine); else if (IS_COFFEELAKE(i915)) - cfl_whitelist_build(w); + cfl_whitelist_build(engine); else if (IS_GEMINILAKE(i915)) - glk_whitelist_build(w); + glk_whitelist_build(engine); else if (IS_KABYLAKE(i915)) - kbl_whitelist_build(w); + kbl_whitelist_build(engine); else if (IS_BROXTON(i915)) - bxt_whitelist_build(w); + bxt_whitelist_build(engine); else if (IS_SKYLAKE(i915)) - skl_whitelist_build(w); + skl_whitelist_build(engine); else if (INTEL_GEN(i915) <= 8) return; else @@ -1383,7 +1441,7 @@ static int engine_wa_list_verify(struct intel_context *ce, goto err_vma; i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ / 5) < 0) { err = -ETIME; goto err_vma; } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2941916b37bf..086801b51441 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -22,8 +22,9 @@ * */ +#include "gem/i915_gem_context.h" + #include "i915_drv.h" -#include "i915_gem_context.h" #include "intel_context.h" #include "intel_engine_pm.h" @@ -145,12 +146,18 @@ static void mock_context_destroy(struct kref *ref) static int mock_context_pin(struct intel_context *ce) { + int ret; + if (!ce->ring) { ce->ring = mock_ring(ce->engine); if (!ce->ring) return -ENOMEM; } + ret = intel_context_active_acquire(ce, PIN_HIGH); + if (ret) + return ret; + mock_timeline_pin(ce->ring->timeline); return 0; } @@ -222,17 +229,17 @@ static void mock_cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline.requests, sched.link) { + list_for_each_entry(request, &engine->active.requests, sched.link) { if (!i915_request_signaled(request)) dma_fence_set_error(&request->fence, -EIO); i915_request_mark_complete(request); } - spin_unlock_irqrestore(&engine->timeline.lock, flags); + spin_unlock_irqrestore(&engine->active.lock, flags); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -278,28 +285,23 @@ int mock_engine_init(struct intel_engine_cs *engine) struct drm_i915_private *i915 = engine->i915; int err; + intel_engine_init_active(engine, ENGINE_MOCK); intel_engine_init_breadcrumbs(engine); intel_engine_init_execlists(engine); intel_engine_init__pm(engine); - if (i915_timeline_init(i915, &engine->timeline, NULL)) - goto err_breadcrumbs; - i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE); - engine->kernel_context = i915_gem_context_get_engine(i915->kernel_context, engine->id); if (IS_ERR(engine->kernel_context)) - goto err_timeline; + goto err_breadcrumbs; err = intel_context_pin(engine->kernel_context); intel_context_put(engine->kernel_context); if (err) - goto err_timeline; + goto err_breadcrumbs; return 0; -err_timeline: - i915_timeline_fini(&engine->timeline); err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); return -ENOMEM; @@ -327,18 +329,12 @@ void mock_engine_free(struct intel_engine_cs *engine) { struct mock_engine *mock = container_of(engine, typeof(*mock), base); - struct intel_context *ce; GEM_BUG_ON(timer_pending(&mock->hw_delay)); - ce = fetch_and_zero(&engine->last_retired_context); - if (ce) - intel_context_unpin(ce); - intel_context_unpin(engine->kernel_context); intel_engine_fini_breadcrumbs(engine); - i915_timeline_fini(&engine->timeline); kfree(engine); } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 48a51739b926..1ee4c923044f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -24,19 +24,21 @@ #include <linux/kthread.h> +#include "gem/i915_gem_context.h" #include "intel_engine_pm.h" #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_reset.h" #include "selftests/igt_wedge_me.h" #include "selftests/igt_atomic.h" -#include "selftests/mock_context.h" #include "selftests/mock_drm.h" +#include "gem/selftests/mock_context.h" +#include "gem/selftests/igt_gem_utils.h" + #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ struct hang { @@ -115,24 +117,18 @@ static int move_to_active(struct i915_vma *vma, { int err; + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, flags); - if (err) - return err; + i915_vma_unlock(vma); - if (!i915_gem_object_has_active_reference(vma->obj)) { - i915_gem_object_get(vma->obj); - i915_gem_object_set_active_reference(vma->obj); - } - - return 0; + return err; } static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct drm_i915_private *i915 = h->i915; - struct i915_address_space *vm = - h->ctx->ppgtt ? &h->ctx->ppgtt->vm : &i915->ggtt.vm; + struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; unsigned int flags; @@ -343,8 +339,7 @@ static int igt_hang_sanitycheck(void *arg) timeout = 0; igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/) - timeout = i915_request_wait(rq, - I915_WAIT_LOCKED, + timeout = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); if (i915_reset_failed(i915)) timeout = -EIO; @@ -398,7 +393,7 @@ static int igt_reset_nop(void *arg) } i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); reset_count = i915_reset_count(&i915->gpu_error); count = 0; do { @@ -445,7 +440,7 @@ static int igt_reset_nop(void *arg) err = igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); out: mock_file_free(i915, file); @@ -482,7 +477,7 @@ static int igt_reset_nop_engine(void *arg) } i915_gem_context_clear_bannable(ctx); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); for_each_engine(engine, i915, id) { unsigned int reset_count, reset_engine_count; unsigned int count; @@ -553,7 +548,7 @@ static int igt_reset_nop_engine(void *arg) err = igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); out: mock_file_free(i915, file); if (i915_reset_failed(i915)) @@ -1102,7 +1097,7 @@ static int igt_reset_wait(void *arg) reset_count = fake_hangcheck(i915, ALL_ENGINES); - timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); + timeout = i915_request_wait(rq, 0, 10); if (timeout < 0) { pr_err("i915_request_wait failed on a stuck request: err=%ld\n", timeout); @@ -1250,7 +1245,9 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915, } } + i915_vma_lock(arg.vma); err = i915_vma_move_to_active(arg.vma, rq, flags); + i915_vma_unlock(arg.vma); if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_vma_unpin_fence(arg.vma); @@ -1355,8 +1352,8 @@ static int igt_reset_evict_ppgtt(void *arg) } err = 0; - if (ctx->ppgtt) /* aliasing == global gtt locking, covered above */ - err = __igt_reset_evict_vma(i915, &ctx->ppgtt->vm, + if (ctx->vm) /* aliasing == global gtt locking, covered above */ + err = __igt_reset_evict_vma(i915, ctx->vm, evict_vma, EXEC_OBJECT_WRITE); out: @@ -1668,9 +1665,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine, struct igt_wedge_me w; igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/) - i915_request_wait(rq, - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); if (i915_reset_failed(i915)) err = -EIO; } @@ -1751,7 +1746,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) if (i915_terminally_wedged(i915)) return -EIO; /* we're long past hope of a successful reset */ - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck); drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */ @@ -1762,7 +1757,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) mutex_unlock(&i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index a8c50900e2d4..401e8b539297 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -6,15 +6,18 @@ #include <linux/prime_numbers.h> +#include "gem/i915_gem_pm.h" #include "gt/intel_reset.h" + #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_live_test.h" #include "selftests/igt_spinner.h" #include "selftests/lib_sw_fence.h" -#include "selftests/mock_context.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" static int live_sanitycheck(void *arg) { @@ -30,7 +33,7 @@ static int live_sanitycheck(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (igt_spinner_init(&spin, i915)) goto err_unlock; @@ -71,7 +74,7 @@ err_spin: igt_spinner_fini(&spin); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -94,7 +97,7 @@ static int live_busywait_preempt(void *arg) */ mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); ctx_hi = kernel_context(i915); if (!ctx_hi) @@ -189,7 +192,7 @@ static int live_busywait_preempt(void *arg) } /* Low priority request should be busywaiting now */ - if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) { + if (i915_request_wait(lo, 0, 1) != -ETIME) { pr_err("%s: Busywaiting request did not!\n", engine->name); err = -EIO; @@ -217,7 +220,7 @@ static int live_busywait_preempt(void *arg) intel_ring_advance(hi, cs); i915_request_add(hi); - if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) { + if (i915_request_wait(lo, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to preempt semaphore busywait!\n", @@ -252,7 +255,7 @@ err_ctx_hi: err_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -274,7 +277,7 @@ static int live_preempt(void *arg) pr_err("Logical preemption supported, but not exposed\n"); mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -359,7 +362,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -379,7 +382,7 @@ static int live_late_preempt(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -463,7 +466,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -529,7 +532,7 @@ static int live_suppress_self_preempt(void *arg) return 0; /* presume black blox */ mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (preempt_client_init(i915, &a)) goto err_unlock; @@ -603,7 +606,7 @@ err_client_a: err_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -680,7 +683,7 @@ static int live_suppress_wait_preempt(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (preempt_client_init(i915, &client[0])) /* ELSP[0] */ goto err_unlock; @@ -736,7 +739,6 @@ static int live_suppress_wait_preempt(void *arg) GEM_BUG_ON(!i915_request_started(rq[0])); if (i915_request_wait(rq[depth], - I915_WAIT_LOCKED | I915_WAIT_PRIORITY, 1) != -ETIME) { pr_err("%s: Waiter depth:%d completed!\n", @@ -773,7 +775,7 @@ err_client_0: err_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -804,7 +806,7 @@ static int live_chain_preempt(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (preempt_client_init(i915, &hi)) goto err_unlock; @@ -838,7 +840,7 @@ static int live_chain_preempt(void *arg) __func__, engine->name, ring_size); igt_spinner_end(&lo.spin); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) < 0) { + if (i915_request_wait(rq, 0, HZ / 2) < 0) { pr_err("Timed out waiting to flush %s\n", engine->name); goto err_wedged; } @@ -879,7 +881,7 @@ static int live_chain_preempt(void *arg) engine->schedule(rq, &attr); igt_spinner_end(&hi.spin); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -895,7 +897,7 @@ static int live_chain_preempt(void *arg) if (IS_ERR(rq)) goto err_wedged; i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ / 5) < 0) { struct drm_printer p = drm_info_printer(i915->drm.dev); @@ -921,7 +923,7 @@ err_client_hi: err_unlock: if (igt_flush_test(i915, I915_WAIT_LOCKED)) err = -EIO; - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -950,7 +952,7 @@ static int live_preempt_hang(void *arg) return 0; mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); if (igt_spinner_init(&spin_hi, i915)) goto err_unlock; @@ -1047,7 +1049,7 @@ err_spin_hi: igt_spinner_fini(&spin_hi); err_unlock: igt_flush_test(i915, I915_WAIT_LOCKED); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -1087,7 +1089,7 @@ static int smoke_submit(struct preempt_smoke *smoke, int err = 0; if (batch) { - vma = i915_vma_instance(batch, &ctx->ppgtt->vm, NULL); + vma = i915_vma_instance(batch, ctx->vm, NULL); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -1105,11 +1107,13 @@ static int smoke_submit(struct preempt_smoke *smoke, } if (vma) { + i915_vma_lock(vma); err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); if (!err) err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); } i915_request_add(rq); @@ -1251,7 +1255,7 @@ static int live_preempt_smoke(void *arg) return -ENOMEM; mutex_lock(&smoke.i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(smoke.i915); + wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm); smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE); if (IS_ERR(smoke.batch)) { @@ -1304,7 +1308,7 @@ err_ctx: err_batch: i915_gem_object_put(smoke.batch); err_unlock: - intel_runtime_pm_put(smoke.i915, wakeref); + intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref); mutex_unlock(&smoke.i915->drm.struct_mutex); kfree(smoke.contexts); @@ -1391,9 +1395,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915, } for (nc = 0; nc < nctx; nc++) { - if (i915_request_wait(request[nc], - I915_WAIT_LOCKED, - HZ / 10) < 0) { + if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { pr_err("%s(%s): wait for %llx:%lld timed out\n", __func__, ve[0]->engine->name, request[nc]->fence.context, @@ -1526,8 +1528,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915, for (n = 0; n < nsibling; n++) { request[n] = i915_request_create(ve); - if (IS_ERR(request)) { - err = PTR_ERR(request); + if (IS_ERR(request[n])) { + err = PTR_ERR(request[n]); nsibling = n; goto out; } @@ -1540,7 +1542,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915, } for (n = 0; n < nsibling; n++) { - if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) { + if (i915_request_wait(request[n], 0, HZ / 10) < 0) { pr_err("%s(%s): wait for %llx:%lld timed out\n", __func__, ve->engine->name, request[n]->fence.context, @@ -1715,9 +1717,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, } onstack_fence_fini(&fence); - if (i915_request_wait(rq[0], - I915_WAIT_LOCKED, - HZ / 10) < 0) { + if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { pr_err("Master request did not execute (on %s)!\n", rq[0]->engine->name); err = -EIO; @@ -1725,8 +1725,7 @@ static int bond_virtual_engine(struct drm_i915_private *i915, } for (n = 0; n < nsibling; n++) { - if (i915_request_wait(rq[n + 1], - I915_WAIT_LOCKED, + if (i915_request_wait(rq[n + 1], 0, MAX_SCHEDULE_TIMEOUT) < 0) { err = -EIO; goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 607473439eb0..89da9e7cc1ba 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -42,14 +42,14 @@ static int igt_wedged_reset(void *arg) /* Check that we can recover a wedged device with a GPU reset */ igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); i915_gem_set_wedged(i915); GEM_BUG_ON(!i915_reset_failed(i915)); i915_reset(i915, ALL_ENGINES, NULL); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); igt_global_reset_unlock(i915); return i915_reset_failed(i915) ? -EIO : 0; @@ -111,7 +111,7 @@ int intel_reset_live_selftests(struct drm_i915_private *i915) if (i915_terminally_wedged(i915)) return -EIO; /* we're long past hope of a successful reset */ - with_intel_runtime_pm(i915, wakeref) + with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = i915_subtests(tests, i915); return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index f9c9e7291187..9eaf030affd0 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -4,17 +4,19 @@ * Copyright © 2018 Intel Corporation */ +#include "gem/i915_gem_pm.h" #include "i915_selftest.h" #include "intel_reset.h" #include "selftests/igt_flush_test.h" -#include "selftests/igt_gem_utils.h" #include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" #include "selftests/igt_wedge_me.h" -#include "selftests/mock_context.h" #include "selftests/mock_drm.h" +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + static const struct wo_register { enum intel_platform platform; u32 reg; @@ -116,7 +118,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) goto err_pin; } + i915_vma_lock(vma); err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unlock(vma); if (err) goto err_req; @@ -138,9 +142,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) } intel_ring_advance(rq, cs); - i915_gem_object_get(result); - i915_gem_object_set_active_reference(result); - i915_request_add(rq); i915_vma_unpin(vma); @@ -193,8 +194,10 @@ static int check_whitelist(struct i915_gem_context *ctx, return PTR_ERR(results); err = 0; + i915_gem_object_lock(results); igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */ err = i915_gem_object_set_to_cpu_domain(results, false); + i915_gem_object_unlock(results); if (i915_terminally_wedged(ctx->i915)) err = -EIO; if (err) @@ -253,7 +256,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine, GEM_BUG_ON(i915_gem_context_is_bannable(ctx)); rq = ERR_PTR(-ENODEV); - with_intel_runtime_pm(engine->i915, wakeref) + with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref) rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP); kernel_context_close(ctx); @@ -309,7 +312,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, if (err) goto out; - with_intel_runtime_pm(i915, wakeref) + with_intel_runtime_pm(&i915->runtime_pm, wakeref) err = reset(engine); igt_spinner_end(&spin); @@ -355,7 +358,7 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (IS_ERR(obj)) return ERR_CAST(obj); - vma = i915_vma_instance(obj, &ctx->ppgtt->vm, NULL); + vma = i915_vma_instance(obj, ctx->vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_obj; @@ -365,10 +368,6 @@ static struct i915_vma *create_batch(struct i915_gem_context *ctx) if (err) goto err_obj; - err = i915_gem_object_set_to_wc_domain(obj, true); - if (err) - goto err_obj; - return vma; err_obj: @@ -403,6 +402,29 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg) return false; } +static bool ro_register(u32 reg) +{ + if (reg & RING_FORCE_TO_NONPRIV_RD) + return true; + + return false; +} + +static int whitelist_writable_count(struct intel_engine_cs *engine) +{ + int count = engine->whitelist.count; + int i; + + for (i = 0; i < engine->whitelist.count; i++) { + u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + + if (ro_register(reg)) + count--; + } + + return count; +} + static int check_dirty_whitelist(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -437,7 +459,7 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, int err = 0, i, v; u32 *cs, *results; - scratch = create_scratch(&ctx->ppgtt->vm, 2 * ARRAY_SIZE(values) + 1); + scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -458,6 +480,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx, if (wo_register(engine, reg)) continue; + if (ro_register(reg)) + continue; + srm = MI_STORE_REGISTER_MEM; lrm = MI_LOAD_REGISTER_MEM; if (INTEL_GEN(ctx->i915) >= 8) @@ -542,7 +567,7 @@ err_request: if (err) goto out_batch; - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ / 5) < 0) { pr_err("%s: Futzing %x timedout; cancelling test\n", engine->name, reg); i915_gem_set_wedged(ctx->i915); @@ -637,7 +662,7 @@ static int live_dirty_whitelist(void *arg) if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */ return 0; - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); mutex_unlock(&i915->drm.struct_mutex); file = mock_file(i915); @@ -667,7 +692,7 @@ out_file: mock_file_free(i915, file); mutex_lock(&i915->drm.struct_mutex); out_rpm: - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); return err; } @@ -729,9 +754,13 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, for (i = 0; i < engine->whitelist.count; i++) { u64 offset = results->node.start + sizeof(u32) * i; + u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + + /* Clear RD only and WR only flags */ + reg &= ~(RING_FORCE_TO_NONPRIV_RD | RING_FORCE_TO_NONPRIV_WR); *cs++ = srm; - *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + *cs++ = reg; *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); } @@ -740,7 +769,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, err_req: i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ / 5) < 0) err = -EIO; return err; @@ -764,9 +793,14 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, goto err_batch; } - *cs++ = MI_LOAD_REGISTER_IMM(engine->whitelist.count); + *cs++ = MI_LOAD_REGISTER_IMM(whitelist_writable_count(engine)); for (i = 0; i < engine->whitelist.count; i++) { - *cs++ = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg); + + if (ro_register(reg)) + continue; + + *cs++ = reg; *cs++ = 0xffffffff; } *cs++ = MI_BATCH_BUFFER_END; @@ -791,7 +825,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, err_request: i915_request_add(rq); - if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 5) < 0) + if (i915_request_wait(rq, 0, HZ / 5) < 0) err = -EIO; err_unpin: @@ -920,7 +954,7 @@ static int live_isolated_whitelist(void *arg) if (!intel_engines_has_context_isolation(i915)) return 0; - if (!i915->kernel_context->ppgtt) + if (!i915->kernel_context->vm) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { @@ -932,14 +966,14 @@ static int live_isolated_whitelist(void *arg) goto err; } - client[i].scratch[0] = create_scratch(&c->ppgtt->vm, 1024); + client[i].scratch[0] = create_scratch(c->vm, 1024); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(&c->ppgtt->vm, 1024); + client[i].scratch[1] = create_scratch(c->vm, 1024); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); @@ -951,7 +985,7 @@ static int live_isolated_whitelist(void *arg) } for_each_engine(engine, i915, id) { - if (!engine->whitelist.count) + if (!whitelist_writable_count(engine)) continue; /* Read default values */ @@ -1056,7 +1090,7 @@ live_gpu_reset_workarounds(void *arg) pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); reference_lists_init(i915, &lists); @@ -1071,7 +1105,7 @@ live_gpu_reset_workarounds(void *arg) out: kernel_context_close(ctx); reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); igt_global_reset_unlock(i915); return ok ? 0 : -ESRCH; @@ -1098,7 +1132,7 @@ live_engine_reset_workarounds(void *arg) return PTR_ERR(ctx); igt_global_reset_lock(i915); - wakeref = intel_runtime_pm_get(i915); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); reference_lists_init(i915, &lists); @@ -1155,7 +1189,7 @@ live_engine_reset_workarounds(void *arg) err: reference_lists_fini(i915, &lists); - intel_runtime_pm_put(i915, wakeref); + intel_runtime_pm_put(&i915->runtime_pm, wakeref); igt_global_reset_unlock(i915); kernel_context_close(ctx); |

