summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c150
1 files changed, 121 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8bc3283484be..1632f18e6a64 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -369,7 +369,8 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
if (i915_gem_request_completed(rq))
goto out;
- /* This client is about to stall waiting for the GPU. In many cases
+ /*
+ * This client is about to stall waiting for the GPU. In many cases
* this is undesirable and limits the throughput of the system, as
* many clients cannot continue processing user input/output whilst
* blocked. RPS autotuning may take tens of milliseconds to respond
@@ -384,11 +385,9 @@ i915_gem_object_wait_fence(struct dma_fence *fence,
* forcing the clocks too high for the whole system, we only allow
* each client to waitboost once in a busy period.
*/
- if (rps_client) {
+ if (rps_client && !i915_gem_request_started(rq)) {
if (INTEL_GEN(rq->i915) >= 6)
gen6_rps_boost(rq, rps_client);
- else
- rps_client = NULL;
}
timeout = i915_wait_request(rq, flags, timeout);
@@ -2824,24 +2823,23 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
return 0;
}
-static bool ban_context(const struct i915_gem_context *ctx,
- unsigned int score)
-{
- return (i915_gem_context_is_bannable(ctx) &&
- score >= CONTEXT_SCORE_BAN_THRESHOLD);
-}
-
static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
{
- unsigned int score;
bool banned;
atomic_inc(&ctx->guilty_count);
- score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
- banned = ban_context(ctx, score);
- DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
- ctx->name, score, yesno(banned));
+ banned = false;
+ if (i915_gem_context_is_bannable(ctx)) {
+ unsigned int score;
+
+ score = atomic_add_return(CONTEXT_SCORE_GUILTY,
+ &ctx->ban_score);
+ banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+
+ DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
+ ctx->name, score, yesno(banned));
+ }
if (!banned)
return;
@@ -3135,7 +3133,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
* an incoherent read by the CS (presumably stale TLB). An
* empty request appears sufficient to paper over the glitch.
*/
- if (list_empty(&engine->timeline->requests)) {
+ if (intel_engine_is_idle(engine)) {
struct drm_i915_gem_request *rq;
rq = i915_gem_request_alloc(engine,
@@ -3200,6 +3198,13 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ if (drm_debug & DRM_UT_DRIVER) {
+ struct drm_printer p = drm_debug_printer(__func__);
+
+ for_each_engine(engine, i915, id)
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ }
+
/*
* First, stop submission to hw, but do not yet complete requests by
* rolling the global seqno forward (since this would complete requests
@@ -3323,15 +3328,73 @@ i915_gem_retire_work_handler(struct work_struct *work)
mutex_unlock(&dev->struct_mutex);
}
- /* Keep the retire handler running until we are finally idle.
+ /*
+ * Keep the retire handler running until we are finally idle.
* We do not need to do this test under locking as in the worst-case
* we queue the retire worker once too often.
*/
- if (READ_ONCE(dev_priv->gt.awake)) {
- i915_queue_hangcheck(dev_priv);
+ if (READ_ONCE(dev_priv->gt.awake))
queue_delayed_work(dev_priv->wq,
&dev_priv->gt.retire_work,
round_jiffies_up_relative(HZ));
+}
+
+static void shrink_caches(struct drm_i915_private *i915)
+{
+ /*
+ * kmem_cache_shrink() discards empty slabs and reorders partially
+ * filled slabs to prioritise allocating from the mostly full slabs,
+ * with the aim of reducing fragmentation.
+ */
+ kmem_cache_shrink(i915->priorities);
+ kmem_cache_shrink(i915->dependencies);
+ kmem_cache_shrink(i915->requests);
+ kmem_cache_shrink(i915->luts);
+ kmem_cache_shrink(i915->vmas);
+ kmem_cache_shrink(i915->objects);
+}
+
+struct sleep_rcu_work {
+ union {
+ struct rcu_head rcu;
+ struct work_struct work;
+ };
+ struct drm_i915_private *i915;
+ unsigned int epoch;
+};
+
+static inline bool
+same_epoch(struct drm_i915_private *i915, unsigned int epoch)
+{
+ /*
+ * There is a small chance that the epoch wrapped since we started
+ * sleeping. If we assume that epoch is at least a u32, then it will
+ * take at least 2^32 * 100ms for it to wrap, or about 326 years.
+ */
+ return epoch == READ_ONCE(i915->gt.epoch);
+}
+
+static void __sleep_work(struct work_struct *work)
+{
+ struct sleep_rcu_work *s = container_of(work, typeof(*s), work);
+ struct drm_i915_private *i915 = s->i915;
+ unsigned int epoch = s->epoch;
+
+ kfree(s);
+ if (same_epoch(i915, epoch))
+ shrink_caches(i915);
+}
+
+static void __sleep_rcu(struct rcu_head *rcu)
+{
+ struct sleep_rcu_work *s = container_of(rcu, typeof(*s), rcu);
+ struct drm_i915_private *i915 = s->i915;
+
+ if (same_epoch(i915, s->epoch)) {
+ INIT_WORK(&s->work, __sleep_work);
+ queue_work(i915->wq, &s->work);
+ } else {
+ kfree(s);
}
}
@@ -3347,6 +3410,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, typeof(*dev_priv), gt.idle_work.work);
+ unsigned int epoch = I915_EPOCH_INVALID;
bool rearm_hangcheck;
ktime_t end;
@@ -3406,6 +3470,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
GEM_BUG_ON(!dev_priv->gt.awake);
dev_priv->gt.awake = false;
+ epoch = dev_priv->gt.epoch;
+ GEM_BUG_ON(epoch == I915_EPOCH_INVALID);
rearm_hangcheck = false;
if (INTEL_GEN(dev_priv) >= 6)
@@ -3422,6 +3488,23 @@ out_rearm:
GEM_BUG_ON(!dev_priv->gt.awake);
i915_queue_hangcheck(dev_priv);
}
+
+ /*
+ * When we are idle, it is an opportune time to reap our caches.
+ * However, we have many objects that utilise RCU and the ordered
+ * i915->wq that this work is executing on. To try and flush any
+ * pending frees now we are idle, we first wait for an RCU grace
+ * period, and then queue a task (that will run last on the wq) to
+ * shrink and re-optimize the caches.
+ */
+ if (same_epoch(dev_priv, epoch)) {
+ struct sleep_rcu_work *s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (s) {
+ s->i915 = dev_priv;
+ s->epoch = epoch;
+ call_rcu(&s->rcu, __sleep_rcu);
+ }
+ }
}
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
@@ -3567,7 +3650,7 @@ static int wait_for_engines(struct drm_i915_private *i915)
for_each_engine(engine, i915, id)
intel_engine_dump(engine, &p,
- "%s", engine->name);
+ "%s\n", engine->name);
}
i915_gem_set_wedged(i915);
@@ -4699,7 +4782,8 @@ static void __i915_gem_free_work(struct work_struct *work)
container_of(work, struct drm_i915_private, mm.free_work);
struct llist_node *freed;
- /* All file-owned VMA should have been released by this point through
+ /*
+ * All file-owned VMA should have been released by this point through
* i915_gem_close_object(), or earlier by i915_gem_context_close().
* However, the object may also be bound into the global GTT (e.g.
* older GPUs without per-process support, or for direct access through
@@ -4726,13 +4810,18 @@ static void __i915_gem_free_object_rcu(struct rcu_head *head)
container_of(head, typeof(*obj), rcu);
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- /* We can't simply use call_rcu() from i915_gem_free_object()
- * as we need to block whilst unbinding, and the call_rcu
- * task may be called from softirq context. So we take a
- * detour through a worker.
+ /*
+ * Since we require blocking on struct_mutex to unbind the freed
+ * object from the GPU before releasing resources back to the
+ * system, we can not do that directly from the RCU callback (which may
+ * be a softirq context), but must instead then defer that work onto a
+ * kthread. We use the RCU callback rather than move the freed object
+ * directly onto the work queue so that we can mix between using the
+ * worker and performing frees directly from subsequent allocations for
+ * crude but effective memory throttling.
*/
if (llist_add(&obj->freed, &i915->mm.free_list))
- schedule_work(&i915->mm.free_work);
+ queue_work(i915->wq, &i915->mm.free_work);
}
void i915_gem_free_object(struct drm_gem_object *gem_obj)
@@ -4745,7 +4834,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
if (discard_backing_storage(obj))
obj->mm.madv = I915_MADV_DONTNEED;
- /* Before we free the object, make sure any pure RCU-only
+ /*
+ * Before we free the object, make sure any pure RCU-only
* read-side critical sections are complete, e.g.
* i915_gem_busy_ioctl(). For the corresponding synchronized
* lookup see i915_gem_object_lookup_rcu().
@@ -5187,7 +5277,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
- ret = intel_uc_init_wq(dev_priv);
+ ret = intel_uc_init_misc(dev_priv);
if (ret)
return ret;
@@ -5283,6 +5373,8 @@ err_unlock:
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex);
+ intel_uc_fini_misc(dev_priv);
+
if (ret != -EIO)
i915_gem_cleanup_userptr(dev_priv);
OpenPOWER on IntegriCloud