diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 150 |
1 files changed, 121 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8bc3283484be..1632f18e6a64 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -369,7 +369,8 @@ i915_gem_object_wait_fence(struct dma_fence *fence, if (i915_gem_request_completed(rq)) goto out; - /* This client is about to stall waiting for the GPU. In many cases + /* + * This client is about to stall waiting for the GPU. In many cases * this is undesirable and limits the throughput of the system, as * many clients cannot continue processing user input/output whilst * blocked. RPS autotuning may take tens of milliseconds to respond @@ -384,11 +385,9 @@ i915_gem_object_wait_fence(struct dma_fence *fence, * forcing the clocks too high for the whole system, we only allow * each client to waitboost once in a busy period. */ - if (rps_client) { + if (rps_client && !i915_gem_request_started(rq)) { if (INTEL_GEN(rq->i915) >= 6) gen6_rps_boost(rq, rps_client); - else - rps_client = NULL; } timeout = i915_wait_request(rq, flags, timeout); @@ -2824,24 +2823,23 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } -static bool ban_context(const struct i915_gem_context *ctx, - unsigned int score) -{ - return (i915_gem_context_is_bannable(ctx) && - score >= CONTEXT_SCORE_BAN_THRESHOLD); -} - static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) { - unsigned int score; bool banned; atomic_inc(&ctx->guilty_count); - score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); - banned = ban_context(ctx, score); - DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", - ctx->name, score, yesno(banned)); + banned = false; + if (i915_gem_context_is_bannable(ctx)) { + unsigned int score; + + score = atomic_add_return(CONTEXT_SCORE_GUILTY, + &ctx->ban_score); + banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; + + DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", + ctx->name, score, yesno(banned)); + } if (!banned) return; @@ -3135,7 +3133,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) * an incoherent read by the CS (presumably stale TLB). An * empty request appears sufficient to paper over the glitch. */ - if (list_empty(&engine->timeline->requests)) { + if (intel_engine_is_idle(engine)) { struct drm_i915_gem_request *rq; rq = i915_gem_request_alloc(engine, @@ -3200,6 +3198,13 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, "%s\n", engine->name); + } + /* * First, stop submission to hw, but do not yet complete requests by * rolling the global seqno forward (since this would complete requests @@ -3323,15 +3328,73 @@ i915_gem_retire_work_handler(struct work_struct *work) mutex_unlock(&dev->struct_mutex); } - /* Keep the retire handler running until we are finally idle. + /* + * Keep the retire handler running until we are finally idle. * We do not need to do this test under locking as in the worst-case * we queue the retire worker once too often. */ - if (READ_ONCE(dev_priv->gt.awake)) { - i915_queue_hangcheck(dev_priv); + if (READ_ONCE(dev_priv->gt.awake)) queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, round_jiffies_up_relative(HZ)); +} + +static void shrink_caches(struct drm_i915_private *i915) +{ + /* + * kmem_cache_shrink() discards empty slabs and reorders partially + * filled slabs to prioritise allocating from the mostly full slabs, + * with the aim of reducing fragmentation. + */ + kmem_cache_shrink(i915->priorities); + kmem_cache_shrink(i915->dependencies); + kmem_cache_shrink(i915->requests); + kmem_cache_shrink(i915->luts); + kmem_cache_shrink(i915->vmas); + kmem_cache_shrink(i915->objects); +} + +struct sleep_rcu_work { + union { + struct rcu_head rcu; + struct work_struct work; + }; + struct drm_i915_private *i915; + unsigned int epoch; +}; + +static inline bool +same_epoch(struct drm_i915_private *i915, unsigned int epoch) +{ + /* + * There is a small chance that the epoch wrapped since we started + * sleeping. If we assume that epoch is at least a u32, then it will + * take at least 2^32 * 100ms for it to wrap, or about 326 years. + */ + return epoch == READ_ONCE(i915->gt.epoch); +} + +static void __sleep_work(struct work_struct *work) +{ + struct sleep_rcu_work *s = container_of(work, typeof(*s), work); + struct drm_i915_private *i915 = s->i915; + unsigned int epoch = s->epoch; + + kfree(s); + if (same_epoch(i915, epoch)) + shrink_caches(i915); +} + +static void __sleep_rcu(struct rcu_head *rcu) +{ + struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu); + struct drm_i915_private *i915 = s->i915; + + if (same_epoch(i915, s->epoch)) { + INIT_WORK(&s->work, __sleep_work); + queue_work(i915->wq, &s->work); + } else { + kfree(s); } } @@ -3347,6 +3410,7 @@ i915_gem_idle_work_handler(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, typeof(*dev_priv), gt.idle_work.work); + unsigned int epoch = I915_EPOCH_INVALID; bool rearm_hangcheck; ktime_t end; @@ -3406,6 +3470,8 @@ i915_gem_idle_work_handler(struct work_struct *work) GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; + epoch = dev_priv->gt.epoch; + GEM_BUG_ON(epoch == I915_EPOCH_INVALID); rearm_hangcheck = false; if (INTEL_GEN(dev_priv) >= 6) @@ -3422,6 +3488,23 @@ out_rearm: GEM_BUG_ON(!dev_priv->gt.awake); i915_queue_hangcheck(dev_priv); } + + /* + * When we are idle, it is an opportune time to reap our caches. + * However, we have many objects that utilise RCU and the ordered + * i915->wq that this work is executing on. To try and flush any + * pending frees now we are idle, we first wait for an RCU grace + * period, and then queue a task (that will run last on the wq) to + * shrink and re-optimize the caches. + */ + if (same_epoch(dev_priv, epoch)) { + struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s) { + s->i915 = dev_priv; + s->epoch = epoch; + call_rcu(&s->rcu, __sleep_rcu); + } + } } void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) @@ -3567,7 +3650,7 @@ static int wait_for_engines(struct drm_i915_private *i915) for_each_engine(engine, i915, id) intel_engine_dump(engine, &p, - "%s", engine->name); + "%s\n", engine->name); } i915_gem_set_wedged(i915); @@ -4699,7 +4782,8 @@ static void __i915_gem_free_work(struct work_struct *work) container_of(work, struct drm_i915_private, mm.free_work); struct llist_node *freed; - /* All file-owned VMA should have been released by this point through + /* + * All file-owned VMA should have been released by this point through * i915_gem_close_object(), or earlier by i915_gem_context_close(). * However, the object may also be bound into the global GTT (e.g. * older GPUs without per-process support, or for direct access through @@ -4726,13 +4810,18 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head) container_of(head, typeof(*obj), rcu); struct drm_i915_private *i915 = to_i915(obj->base.dev); - /* We can't simply use call_rcu() from i915_gem_free_object() - * as we need to block whilst unbinding, and the call_rcu - * task may be called from softirq context. So we take a - * detour through a worker. + /* + * Since we require blocking on struct_mutex to unbind the freed + * object from the GPU before releasing resources back to the + * system, we can not do that directly from the RCU callback (which may + * be a softirq context), but must instead then defer that work onto a + * kthread. We use the RCU callback rather than move the freed object + * directly onto the work queue so that we can mix between using the + * worker and performing frees directly from subsequent allocations for + * crude but effective memory throttling. */ if (llist_add(&obj->freed, &i915->mm.free_list)) - schedule_work(&i915->mm.free_work); + queue_work(i915->wq, &i915->mm.free_work); } void i915_gem_free_object(struct drm_gem_object *gem_obj) @@ -4745,7 +4834,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) if (discard_backing_storage(obj)) obj->mm.madv = I915_MADV_DONTNEED; - /* Before we free the object, make sure any pure RCU-only + /* + * Before we free the object, make sure any pure RCU-only * read-side critical sections are complete, e.g. * i915_gem_busy_ioctl(). For the corresponding synchronized * lookup see i915_gem_object_lookup_rcu(). @@ -5187,7 +5277,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; - ret = intel_uc_init_wq(dev_priv); + ret = intel_uc_init_misc(dev_priv); if (ret) return ret; @@ -5283,6 +5373,8 @@ err_unlock: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uc_fini_misc(dev_priv); + if (ret != -EIO) i915_gem_cleanup_userptr(dev_priv); |