From 03ac84f1830ec0b90f622500591eb3cc554ee479 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Oct 2016 13:58:36 +0100
Subject: drm/i915: Pass around sg_table to get_pages/put_pages backend

The plan is to move obj->pages out from under the struct_mutex into its
own per-object lock. We need to prune any assumption of the struct_mutex
from the get_pages/put_pages backends, and to make it easier we pass
around the sg_table to operate on rather than indirectly via the obj.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-13-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h')

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c241d8143255..dbe6a6cec20d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -628,8 +628,10 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
 void i915_gem_restore_gtt_mappings(struct drm_device *dev);
 
-int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj);
-void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj);
+int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
+					    struct sg_table *pages);
+void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages);
 
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT(0)
-- 
cgit v1.2.3


From d07f0e59b2c762584478920cd2d11fba2980a94a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Oct 2016 13:58:44 +0100
Subject: drm/i915: Move GEM activity tracking into a common struct
 reservation_object

In preparation to support many distinct timelines, we need to expand the
activity tracking on the GEM object to handle more than just a request
per engine. We already use the struct reservation_object on the dma-buf
to handle many fence contexts, so integrating that into the GEM object
itself is the preferred solution. (For example, we can now share the same
reservation_object between every consumer/producer using this buffer and
skip the manual import/export via dma-buf.)

v2: Reimplement busy-ioctl (by walking the reservation object), postpone
the ABI change for another day. Similarly use the reservation object to
find the last_write request (if active and from i915) for choosing
display CS flips.

Caveats:

 * busy-ioctl: busy-ioctl only reports on the native fences, it will not
warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is
being rendered to by external fences. It also will not report the same
busy state as wait-ioctl (or polling on the dma-buf) in the same
circumstances. On the plus side, it does retain reporting of which
*i915* engines are engaged with this object.

 * non-blocking atomic modesets take a step backwards as the wait for
render completion blocks the ioctl. This is fixed in a subsequent
patch to use a fence instead for awaiting on the rendering, see
"drm/i915: Restore nonblocking awaits for modesetting"

 * dynamic array manipulation for shared-fences in reservation is slower
than the previous lockless static assignment (e.g. gem_exec_lut_handle
runtime on ivb goes from 42s to 66s), mainly due to atomic operations
(maintaining the fence refcounts).

 * loss of object-level retirement callbacks, emulated by VMA retirement
tracking.

 * minor loss of object-level last activity information from debugfs,
could be replaced with per-vma information if desired

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  15 +-
 drivers/gpu/drm/i915/i915_drv.h            |  62 +++----
 drivers/gpu/drm/i915/i915_gem.c            | 263 ++++++++---------------------
 drivers/gpu/drm/i915/i915_gem_batch_pool.c |  11 +-
 drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  53 +-----
 drivers/gpu/drm/i915/i915_gem_dmabuf.h     |  45 -----
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  55 ++----
 drivers/gpu/drm/i915/i915_gem_gtt.c        |  32 ++++
 drivers/gpu/drm/i915/i915_gem_gtt.h        |   1 +
 drivers/gpu/drm/i915/i915_gem_request.c    |  48 +++---
 drivers/gpu/drm/i915/i915_gem_request.h    |  35 +---
 drivers/gpu/drm/i915/i915_gpu_error.c      |   6 +-
 drivers/gpu/drm/i915/intel_atomic_plane.c  |   2 -
 drivers/gpu/drm/i915/intel_display.c       | 114 +++----------
 drivers/gpu/drm/i915/intel_drv.h           |   3 -
 15 files changed, 212 insertions(+), 533 deletions(-)
 delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.h

(limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index b0b01002c0d1..b6325065c8e8 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -136,11 +136,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	struct i915_vma *vma;
 	unsigned int frontbuffer_bits;
 	int pin_count = 0;
-	enum intel_engine_id id;
 
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 
-	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ",
+	seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
 		   &obj->base,
 		   get_active_flag(obj),
 		   get_pin_flag(obj),
@@ -149,14 +148,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   get_pin_mapped_flag(obj),
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
-		   obj->base.write_domain);
-	for_each_engine(engine, dev_priv, id)
-		seq_printf(m, "%x ",
-			   i915_gem_active_get_seqno(&obj->last_read[id],
-						     &obj->base.dev->struct_mutex));
-	seq_printf(m, "] %x %s%s%s",
-		   i915_gem_active_get_seqno(&obj->last_write,
-					     &obj->base.dev->struct_mutex),
+		   obj->base.write_domain,
 		   i915_cache_level_str(dev_priv, obj->cache_level),
 		   obj->mm.dirty ? " dirty" : "",
 		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
@@ -187,8 +179,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08llx)", obj->stolen->start);
 
-	engine = i915_gem_active_get_engine(&obj->last_write,
-					    &dev_priv->drm.struct_mutex);
+	engine = i915_gem_object_last_write_engine(obj);
 	if (engine)
 		seq_printf(m, " (%s)", engine->name);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c3c49bc4d2ac..693ee69a4715 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -41,6 +41,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/reservation.h>
 #include <linux/shmem_fs.h>
 
 #include <drm/drmP.h>
@@ -2246,21 +2247,12 @@ struct drm_i915_gem_object {
 	struct list_head batch_pool_link;
 
 	unsigned long flags;
-	/**
-	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
-	 */
-#define I915_BO_ACTIVE_SHIFT 0
-#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1)
-#define __I915_BO_ACTIVE(bo) \
-	((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK)
 
 	/**
 	 * Have we taken a reference for the object for incomplete GPU
 	 * activity?
 	 */
-#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES)
+#define I915_BO_ACTIVE_REF 0
 
 	/*
 	 * Is the object to be mapped as read-only to the GPU
@@ -2281,6 +2273,7 @@ struct drm_i915_gem_object {
 
 	/** Count of VMA actually bound by this object */
 	unsigned int bind_count;
+	unsigned int active_count;
 	unsigned int pin_display;
 
 	struct {
@@ -2320,8 +2313,7 @@ struct drm_i915_gem_object {
 	 * read request. This allows for the CPU to read from an active
 	 * buffer by only waiting for the write to complete.
 	 */
-	struct i915_gem_active last_read[I915_NUM_ENGINES];
-	struct i915_gem_active last_write;
+	struct reservation_object *resv;
 
 	/** References from framebuffers, locks out tiling changes. */
 	unsigned long framebuffer_references;
@@ -2340,6 +2332,8 @@ struct drm_i915_gem_object {
 
 	/** for phys allocated objects */
 	struct drm_dma_handle *phys_handle;
+
+	struct reservation_object __builtin_resv;
 };
 
 static inline struct drm_i915_gem_object *
@@ -2425,35 +2419,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
 	return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE;
 }
 
-static inline unsigned long
-i915_gem_object_get_active(const struct drm_i915_gem_object *obj)
-{
-	return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK;
-}
-
 static inline bool
 i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
 {
-	return i915_gem_object_get_active(obj);
-}
-
-static inline void
-i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine)
-{
-	obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT);
-}
-
-static inline void
-i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine)
-{
-	obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT);
-}
-
-static inline bool
-i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj,
-				  int engine)
-{
-	return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT);
+	return obj->active_count;
 }
 
 static inline bool
@@ -2496,6 +2465,23 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj)
 	return obj->tiling_and_stride & STRIDE_MASK;
 }
 
+static inline struct intel_engine_cs *
+i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
+{
+	struct intel_engine_cs *engine = NULL;
+	struct dma_fence *fence;
+
+	rcu_read_lock();
+	fence = reservation_object_get_excl_rcu(obj->resv);
+	rcu_read_unlock();
+
+	if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
+		engine = to_request(fence)->engine;
+	dma_fence_put(fence);
+
+	return engine;
+}
+
 static inline struct i915_vma *i915_vma_get(struct i915_vma *vma)
 {
 	i915_gem_object_get(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fdcbaab21fdd..1161a21ec810 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -29,7 +29,6 @@
 #include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
@@ -447,11 +446,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 		     long timeout,
 		     struct intel_rps_client *rps)
 {
-	struct reservation_object *resv;
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
-
 	might_sleep();
 #if IS_ENABLED(CONFIG_LOCKDEP)
 	GEM_BUG_ON(debug_locks &&
@@ -460,33 +454,9 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj,
 #endif
 	GEM_BUG_ON(timeout < 0);
 
-	if (flags & I915_WAIT_ALL) {
-		active = obj->last_read;
-		active_mask = i915_gem_object_get_active(obj);
-	} else {
-		active_mask = 1;
-		active = &obj->last_write;
-	}
-
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-
-		request = i915_gem_active_get_unlocked(&active[idx]);
-		if (request) {
-			timeout = i915_gem_object_wait_fence(&request->fence,
-							     flags, timeout,
-							     rps);
-			i915_gem_request_put(request);
-		}
-		if (timeout < 0)
-			return timeout;
-	}
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv)
-		timeout = i915_gem_object_wait_reservation(resv,
-							   flags, timeout,
-							   rps);
+	timeout = i915_gem_object_wait_reservation(obj->resv,
+						   flags, timeout,
+						   rps);
 	return timeout < 0 ? timeout : 0;
 }
 
@@ -2549,44 +2519,6 @@ err_unlock:
 	goto out_unlock;
 }
 
-static void
-i915_gem_object_retire__write(struct i915_gem_active *active,
-			      struct drm_i915_gem_request *request)
-{
-	struct drm_i915_gem_object *obj =
-		container_of(active, struct drm_i915_gem_object, last_write);
-
-	intel_fb_obj_flush(obj, true, ORIGIN_CS);
-}
-
-static void
-i915_gem_object_retire__read(struct i915_gem_active *active,
-			     struct drm_i915_gem_request *request)
-{
-	int idx = request->engine->id;
-	struct drm_i915_gem_object *obj =
-		container_of(active, struct drm_i915_gem_object, last_read[idx]);
-
-	GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx));
-
-	i915_gem_object_clear_active(obj, idx);
-	if (i915_gem_object_is_active(obj))
-		return;
-
-	/* Bump our place on the bound list to keep it roughly in LRU order
-	 * so that we don't steal from recently used but inactive objects
-	 * (unless we are forced to ofc!)
-	 */
-	if (obj->bind_count)
-		list_move_tail(&obj->global_list,
-			       &request->i915->mm.bound_list);
-
-	if (i915_gem_object_has_active_reference(obj)) {
-		i915_gem_object_clear_active_reference(obj);
-		i915_gem_object_put(obj);
-	}
-}
-
 static bool i915_context_is_banned(const struct i915_gem_context *ctx)
 {
 	unsigned long elapsed;
@@ -2966,6 +2898,13 @@ int i915_vma_unbind(struct i915_vma *vma)
 		 * In order to prevent it from being recursively closed,
 		 * take a pin on the vma so that the second unbind is
 		 * aborted.
+		 *
+		 * Even more scary is that the retire callback may free
+		 * the object (last active vma). To prevent the explosion
+		 * we defer the actual object free to a worker that can
+		 * only proceed once it acquires the struct_mutex (which
+		 * we currently hold, therefore it cannot free this object
+		 * before we are finished).
 		 */
 		__i915_vma_pin(vma);
 
@@ -4010,83 +3949,42 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
 }
 
 static __always_inline unsigned int
-__busy_set_if_active(const struct i915_gem_active *active,
+__busy_set_if_active(const struct dma_fence *fence,
 		     unsigned int (*flag)(unsigned int id))
 {
-	struct drm_i915_gem_request *request;
-
-	request = rcu_dereference(active->request);
-	if (!request || i915_gem_request_completed(request))
-		return 0;
+	struct drm_i915_gem_request *rq;
 
-	/* This is racy. See __i915_gem_active_get_rcu() for an in detail
-	 * discussion of how to handle the race correctly, but for reporting
-	 * the busy state we err on the side of potentially reporting the
-	 * wrong engine as being busy (but we guarantee that the result
-	 * is at least self-consistent).
-	 *
-	 * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated
-	 * whilst we are inspecting it, even under the RCU read lock as we are.
-	 * This means that there is a small window for the engine and/or the
-	 * seqno to have been overwritten. The seqno will always be in the
-	 * future compared to the intended, and so we know that if that
-	 * seqno is idle (on whatever engine) our request is idle and the
-	 * return 0 above is correct.
+	/* We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
 	 *
-	 * The issue is that if the engine is switched, it is just as likely
-	 * to report that it is busy (but since the switch happened, we know
-	 * the request should be idle). So there is a small chance that a busy
-	 * result is actually the wrong engine.
-	 *
-	 * So why don't we care?
-	 *
-	 * For starters, the busy ioctl is a heuristic that is by definition
-	 * racy. Even with perfect serialisation in the driver, the hardware
-	 * state is constantly advancing - the state we report to the user
-	 * is stale.
-	 *
-	 * The critical information for the busy-ioctl is whether the object
-	 * is idle as userspace relies on that to detect whether its next
-	 * access will stall, or if it has missed submitting commands to
-	 * the hardware allowing the GPU to stall. We never generate a
-	 * false-positive for idleness, thus busy-ioctl is reliable at the
-	 * most fundamental level, and we maintain the guarantee that a
-	 * busy object left to itself will eventually become idle (and stay
-	 * idle!).
-	 *
-	 * We allow ourselves the leeway of potentially misreporting the busy
-	 * state because that is an optimisation heuristic that is constantly
-	 * in flux. Being quickly able to detect the busy/idle state is much
-	 * more important than accurate logging of exactly which engines were
-	 * busy.
-	 *
-	 * For accuracy in reporting the engine, we could use
-	 *
-	 *	result = 0;
-	 *	request = __i915_gem_active_get_rcu(active);
-	 *	if (request) {
-	 *		if (!i915_gem_request_completed(request))
-	 *			result = flag(request->engine->exec_id);
-	 *		i915_gem_request_put(request);
-	 *	}
-	 *
-	 * but that still remains susceptible to both hardware and userspace
-	 * races. So we accept making the result of that race slightly worse,
-	 * given the rarity of the race and its low impact on the result.
+	 * Note we only report on the status of native fences.
 	 */
-	return flag(READ_ONCE(request->engine->exec_id));
+	if (!dma_fence_is_i915(fence))
+		return 0;
+
+	/* opencode to_request() in order to avoid const warnings */
+	rq = container_of(fence, struct drm_i915_gem_request, fence);
+	if (i915_gem_request_completed(rq))
+		return 0;
+
+	return flag(rq->engine->exec_id);
 }
 
 static __always_inline unsigned int
-busy_check_reader(const struct i915_gem_active *active)
+busy_check_reader(const struct dma_fence *fence)
 {
-	return __busy_set_if_active(active, __busy_read_flag);
+	return __busy_set_if_active(fence, __busy_read_flag);
 }
 
 static __always_inline unsigned int
-busy_check_writer(const struct i915_gem_active *active)
+busy_check_writer(const struct dma_fence *fence)
 {
-	return __busy_set_if_active(active, __busy_write_id);
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
 }
 
 int
@@ -4095,63 +3993,55 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_busy *args = data;
 	struct drm_i915_gem_object *obj;
-	unsigned long active;
+	struct reservation_object_list *list;
+	unsigned int seq;
 	int err;
 
+	err = -ENOENT;
 	rcu_read_lock();
 	obj = i915_gem_object_lookup_rcu(file, args->handle);
-	if (!obj) {
-		err = -ENOENT;
+	if (!obj)
 		goto out;
-	}
 
-	args->busy = 0;
-	active = __I915_BO_ACTIVE(obj);
-	if (active) {
-		int idx;
+	/* A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+retry:
+	seq = raw_read_seqcount(&obj->resv->seq);
 
-		/* Yes, the lookups are intentionally racy.
-		 *
-		 * First, we cannot simply rely on __I915_BO_ACTIVE. We have
-		 * to regard the value as stale and as our ABI guarantees
-		 * forward progress, we confirm the status of each active
-		 * request with the hardware.
-		 *
-		 * Even though we guard the pointer lookup by RCU, that only
-		 * guarantees that the pointer and its contents remain
-		 * dereferencable and does *not* mean that the request we
-		 * have is the same as the one being tracked by the object.
-		 *
-		 * Consider that we lookup the request just as it is being
-		 * retired and freed. We take a local copy of the pointer,
-		 * but before we add its engine into the busy set, the other
-		 * thread reallocates it and assigns it to a task on another
-		 * engine with a fresh and incomplete seqno. Guarding against
-		 * that requires careful serialisation and reference counting,
-		 * i.e. using __i915_gem_active_get_request_rcu(). We don't,
-		 * instead we expect that if the result is busy, which engines
-		 * are busy is not completely reliable - we only guarantee
-		 * that the object was busy.
-		 */
+	/* Translate the exclusive fence to the READ *and* WRITE engine */
+	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
 
-		for_each_active(active, idx)
-			args->busy |= busy_check_reader(&obj->last_read[idx]);
+	/* Translate shared fences to READ set of engines */
+	list = rcu_dereference(obj->resv->fence);
+	if (list) {
+		unsigned int shared_count = list->shared_count, i;
 
-		/* For ABI sanity, we only care that the write engine is in
-		 * the set of read engines. This should be ensured by the
-		 * ordering of setting last_read/last_write in
-		 * i915_vma_move_to_active(), and then in reverse in retire.
-		 * However, for good measure, we always report the last_write
-		 * request as a busy read as well as being a busy write.
-		 *
-		 * We don't care that the set of active read/write engines
-		 * may change during construction of the result, as it is
-		 * equally liable to change before userspace can inspect
-		 * the result.
-		 */
-		args->busy |= busy_check_writer(&obj->last_write);
+		for (i = 0; i < shared_count; ++i) {
+			struct dma_fence *fence =
+				rcu_dereference(list->shared[i]);
+
+			args->busy |= busy_check_reader(fence);
+		}
 	}
 
+	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
+		goto retry;
+
+	err = 0;
 out:
 	rcu_read_unlock();
 	return err;
@@ -4216,23 +4106,19 @@ out:
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
-	int i;
-
 	mutex_init(&obj->mm.lock);
 
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->userfault_link);
-	for (i = 0; i < I915_NUM_ENGINES; i++)
-		init_request_active(&obj->last_read[i],
-				    i915_gem_object_retire__read);
-	init_request_active(&obj->last_write,
-			    i915_gem_object_retire__write);
 	INIT_LIST_HEAD(&obj->obj_exec_link);
 	INIT_LIST_HEAD(&obj->vma_list);
 	INIT_LIST_HEAD(&obj->batch_pool_link);
 
 	obj->ops = ops;
 
+	reservation_object_init(&obj->__builtin_resv);
+	obj->resv = &obj->__builtin_resv;
+
 	obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
 
 	obj->mm.madv = I915_MADV_WILLNEED;
@@ -4385,6 +4271,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 		if (obj->base.import_attach)
 			drm_prime_gem_destroy(&obj->base, NULL);
 
+		reservation_object_fini(&obj->__builtin_resv);
 		drm_gem_object_release(&obj->base);
 		i915_gem_info_remove_obj(i915, obj->base.size);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index e0f38e5c0fbb..b3bc119ec1bb 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -114,11 +114,18 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry(tmp, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
-		if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id],
-					     &tmp->base.dev->struct_mutex))
+		if (i915_gem_object_is_active(tmp))
 			break;
 
+		GEM_BUG_ON(!reservation_object_test_signaled_rcu(tmp->resv,
+								 true));
+
 		if (tmp->base.size >= size) {
+			/* Clear the set of shared fences early */
+			ww_mutex_lock(&tmp->resv->lock, NULL);
+			reservation_object_add_excl_fence(tmp->resv, NULL);
+			ww_mutex_unlock(&tmp->resv->lock);
+
 			obj = tmp;
 			break;
 		}
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 4d45f20d11ed..5e38299b5df6 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -211,60 +211,17 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.end_cpu_access = i915_gem_end_cpu_access,
 };
 
-static void export_fences(struct drm_i915_gem_object *obj,
-			  struct dma_buf *dma_buf)
-{
-	struct reservation_object *resv = dma_buf->resv;
-	struct drm_i915_gem_request *req;
-	unsigned long active;
-	int idx;
-
-	active = __I915_BO_ACTIVE(obj);
-	if (!active)
-		return;
-
-	/* Serialise with execbuf to prevent concurrent fence-loops */
-	mutex_lock(&obj->base.dev->struct_mutex);
-
-	/* Mark the object for future fences before racily adding old fences */
-	obj->base.dma_buf = dma_buf;
-
-	ww_mutex_lock(&resv->lock, NULL);
-
-	for_each_active(active, idx) {
-		req = i915_gem_active_get(&obj->last_read[idx],
-					  &obj->base.dev->struct_mutex);
-		if (!req)
-			continue;
-
-		if (reservation_object_reserve_shared(resv) == 0)
-			reservation_object_add_shared_fence(resv, &req->fence);
-
-		i915_gem_request_put(req);
-	}
-
-	req = i915_gem_active_get(&obj->last_write,
-				  &obj->base.dev->struct_mutex);
-	if (req) {
-		reservation_object_add_excl_fence(resv, &req->fence);
-		i915_gem_request_put(req);
-	}
-
-	ww_mutex_unlock(&resv->lock);
-	mutex_unlock(&obj->base.dev->struct_mutex);
-}
-
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
-	struct dma_buf *dma_buf;
 
 	exp_info.ops = &i915_dmabuf_ops;
 	exp_info.size = gem_obj->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem_obj;
+	exp_info.resv = obj->resv;
 
 	if (obj->ops->dmabuf_export) {
 		int ret = obj->ops->dmabuf_export(obj);
@@ -272,12 +229,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 			return ERR_PTR(ret);
 	}
 
-	dma_buf = drm_gem_dmabuf_export(dev, &exp_info);
-	if (IS_ERR(dma_buf))
-		return dma_buf;
-
-	export_fences(obj, dma_buf);
-	return dma_buf;
+	return drm_gem_dmabuf_export(dev, &exp_info);
 }
 
 static struct sg_table *
@@ -335,6 +287,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
 	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
+	obj->resv = dma_buf->resv;
 
 	/* We use GTT as shorthand for a coherent domain, one that is
 	 * neither in the GPU cache nor in the CPU cache, where all
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/i915_gem_dmabuf.h
deleted file mode 100644
index 91315557e421..000000000000
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef _I915_GEM_DMABUF_H_
-#define _I915_GEM_DMABUF_H_
-
-#include <linux/dma-buf.h>
-
-static inline struct reservation_object *
-i915_gem_object_get_dmabuf_resv(struct drm_i915_gem_object *obj)
-{
-	struct dma_buf *dma_buf;
-
-	if (obj->base.dma_buf)
-		dma_buf = obj->base.dma_buf;
-	else if (obj->base.import_attach)
-		dma_buf = obj->base.import_attach->dmabuf;
-	else
-		return NULL;
-
-	return dma_buf->resv;
-}
-
-#endif
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index d95c4e02eeb9..c35e847bb8bc 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -34,7 +34,6 @@
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_frontbuffer.h"
@@ -1101,45 +1100,20 @@ err:
 	return ret;
 }
 
-static unsigned int eb_other_engines(struct drm_i915_gem_request *req)
-{
-	unsigned int mask;
-
-	mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK;
-	mask <<= I915_BO_ACTIVE_SHIFT;
-
-	return mask;
-}
-
 static int
 i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 				struct list_head *vmas)
 {
-	const unsigned int other_rings = eb_other_engines(req);
 	struct i915_vma *vma;
 	int ret;
 
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;
-		struct reservation_object *resv;
 
-		if (obj->flags & other_rings) {
-			ret = i915_gem_request_await_object
-				(req, obj, obj->base.pending_write_domain);
-			if (ret)
-				return ret;
-		}
-
-		resv = i915_gem_object_get_dmabuf_resv(obj);
-		if (resv) {
-			ret = i915_sw_fence_await_reservation
-				(&req->submit, resv, &i915_fence_ops,
-				 obj->base.pending_write_domain,
-				 I915_FENCE_TIMEOUT,
-				 GFP_KERNEL | __GFP_NOWARN);
-			if (ret < 0)
-				return ret;
-		}
+		ret = i915_gem_request_await_object
+			(req, obj, obj->base.pending_write_domain);
+		if (ret)
+			return ret;
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
 			i915_gem_clflush_object(obj, false);
@@ -1281,8 +1255,6 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
 
-	obj->mm.dirty = true; /* be paranoid  */
-
 	/* Add a reference if we're newly entering the active list.
 	 * The order in which we add operations to the retirement queue is
 	 * vital here: mark_active adds to the start of the callback list,
@@ -1290,11 +1262,14 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 	 * add the active reference first and queue for it to be dropped
 	 * *last*.
 	 */
-	i915_gem_object_set_active(obj, idx);
-	i915_gem_active_set(&obj->last_read[idx], req);
+	if (!i915_vma_is_active(vma))
+		obj->active_count++;
+	i915_vma_set_active(vma, idx);
+	i915_gem_active_set(&vma->last_read[idx], req);
+	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 
 	if (flags & EXEC_OBJECT_WRITE) {
-		i915_gem_active_set(&obj->last_write, req);
+		i915_gem_active_set(&vma->last_write, req);
 
 		intel_fb_obj_invalidate(obj, ORIGIN_CS);
 
@@ -1304,21 +1279,13 @@ void i915_vma_move_to_active(struct i915_vma *vma,
 
 	if (flags & EXEC_OBJECT_NEEDS_FENCE)
 		i915_gem_active_set(&vma->last_fence, req);
-
-	i915_vma_set_active(vma, idx);
-	i915_gem_active_set(&vma->last_read[idx], req);
-	list_move_tail(&vma->vm_link, &vma->vm->active_list);
 }
 
 static void eb_export_fence(struct drm_i915_gem_object *obj,
 			    struct drm_i915_gem_request *req,
 			    unsigned int flags)
 {
-	struct reservation_object *resv;
-
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (!resv)
-		return;
+	struct reservation_object *resv = obj->resv;
 
 	/* Ignore errors from failing to allocate the new fence, we can't
 	 * handle an error right now. Worst case should be missed
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1008209ca797..1b1a459e2b68 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -31,6 +31,7 @@
 #include "i915_vgpu.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include "intel_frontbuffer.h"
 
 #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
 
@@ -3343,6 +3344,7 @@ i915_vma_retire(struct i915_gem_active *active,
 	const unsigned int idx = rq->engine->id;
 	struct i915_vma *vma =
 		container_of(active, struct i915_vma, last_read[idx]);
+	struct drm_i915_gem_object *obj = vma->obj;
 
 	GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx));
 
@@ -3353,6 +3355,34 @@ i915_vma_retire(struct i915_gem_active *active,
 	list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
 	if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma)))
 		WARN_ON(i915_vma_unbind(vma));
+
+	GEM_BUG_ON(!i915_gem_object_is_active(obj));
+	if (--obj->active_count)
+		return;
+
+	/* Bump our place on the bound list to keep it roughly in LRU order
+	 * so that we don't steal from recently used but inactive objects
+	 * (unless we are forced to ofc!)
+	 */
+	if (obj->bind_count)
+		list_move_tail(&obj->global_list, &rq->i915->mm.bound_list);
+
+	obj->mm.dirty = true; /* be paranoid  */
+
+	if (i915_gem_object_has_active_reference(obj)) {
+		i915_gem_object_clear_active_reference(obj);
+		i915_gem_object_put(obj);
+	}
+}
+
+static void
+i915_ggtt_retire__write(struct i915_gem_active *active,
+			struct drm_i915_gem_request *request)
+{
+	struct i915_vma *vma =
+		container_of(active, struct i915_vma, last_write);
+
+	intel_fb_obj_flush(vma->obj, true, ORIGIN_CS);
 }
 
 void i915_vma_destroy(struct i915_vma *vma)
@@ -3396,6 +3426,8 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
 	INIT_LIST_HEAD(&vma->exec_list);
 	for (i = 0; i < ARRAY_SIZE(vma->last_read); i++)
 		init_request_active(&vma->last_read[i], i915_vma_retire);
+	init_request_active(&vma->last_write,
+			    i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL);
 	init_request_active(&vma->last_fence, NULL);
 	list_add(&vma->vm_link, &vm->unbound_list);
 	vma->vm = vm;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index dbe6a6cec20d..9f0327e5176a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -211,6 +211,7 @@ struct i915_vma {
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
+	struct i915_gem_active last_write;
 	struct i915_gem_active last_fence;
 
 	/**
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index d234c28cbb9f..01a7fa513b4a 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -196,6 +196,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	}
 
 	i915_gem_context_put(request->ctx);
+
+	dma_fence_signal(&request->fence);
 	i915_gem_request_put(request);
 }
 
@@ -553,33 +555,41 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to,
 			      struct drm_i915_gem_object *obj,
 			      bool write)
 {
-	struct i915_gem_active *active;
-	unsigned long active_mask;
-	int idx;
+	struct dma_fence *excl;
+	int ret = 0;
 
 	if (write) {
-		active_mask = i915_gem_object_get_active(obj);
-		active = obj->last_read;
+		struct dma_fence **shared;
+		unsigned int count, i;
+
+		ret = reservation_object_get_fences_rcu(obj->resv,
+							&excl, &count, &shared);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < count; i++) {
+			ret = i915_gem_request_await_dma_fence(to, shared[i]);
+			if (ret)
+				break;
+
+			dma_fence_put(shared[i]);
+		}
+
+		for (; i < count; i++)
+			dma_fence_put(shared[i]);
+		kfree(shared);
 	} else {
-		active_mask = 1;
-		active = &obj->last_write;
+		excl = reservation_object_get_excl_rcu(obj->resv);
 	}
 
-	for_each_active(active_mask, idx) {
-		struct drm_i915_gem_request *request;
-		int ret;
-
-		request = i915_gem_active_peek(&active[idx],
-					       &obj->base.dev->struct_mutex);
-		if (!request)
-			continue;
+	if (excl) {
+		if (ret == 0)
+			ret = i915_gem_request_await_dma_fence(to, excl);
 
-		ret = i915_gem_request_await_request(to, request);
-		if (ret)
-			return ret;
+		dma_fence_put(excl);
 	}
 
-	return 0;
+	return ret;
 }
 
 static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 602234f91583..a51d596a60ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -554,22 +554,7 @@ i915_gem_active_isset(const struct i915_gem_active *active)
 }
 
 /**
- * i915_gem_active_is_idle - report whether the active tracker is idle
- * @active - the active tracker
- *
- * i915_gem_active_is_idle() returns true if the active tracker is currently
- * unassigned or if the request is complete (but not yet retired). Requires
- * the caller to hold struct_mutex (but that can be relaxed if desired).
- */
-static inline bool
-i915_gem_active_is_idle(const struct i915_gem_active *active,
-			struct mutex *mutex)
-{
-	return !i915_gem_active_peek(active, mutex);
-}
-
-/**
- * i915_gem_active_wait- waits until the request is completed
+ * i915_gem_active_wait - waits until the request is completed
  * @active - the active request on which to wait
  * @flags - how to wait
  * @timeout - how long to wait at most
@@ -639,24 +624,6 @@ i915_gem_active_retire(struct i915_gem_active *active,
 	return 0;
 }
 
-/* Convenience functions for peeking at state inside active's request whilst
- * guarded by the struct_mutex.
- */
-
-static inline uint32_t
-i915_gem_active_get_seqno(const struct i915_gem_active *active,
-			  struct mutex *mutex)
-{
-	return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex));
-}
-
-static inline struct intel_engine_cs *
-i915_gem_active_get_engine(const struct i915_gem_active *active,
-			   struct mutex *mutex)
-{
-	return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex));
-}
-
 #define for_each_active(mask, idx) \
 	for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 5bbb37209aa5..aa8dadcc669f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -887,9 +887,9 @@ static void capture_bo(struct drm_i915_error_buffer *err,
 	err->name = obj->base.name;
 
 	for (i = 0; i < I915_NUM_ENGINES; i++)
-		err->rseqno[i] = __active_get_seqno(&obj->last_read[i]);
-	err->wseqno = __active_get_seqno(&obj->last_write);
-	err->engine = __active_get_engine_id(&obj->last_write);
+		err->rseqno[i] = __active_get_seqno(&vma->last_read[i]);
+	err->wseqno = __active_get_seqno(&vma->last_write);
+	err->engine = __active_get_engine_id(&vma->last_write);
 
 	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index c762ae549a1c..cb5594411bb6 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -84,7 +84,6 @@ intel_plane_duplicate_state(struct drm_plane *plane)
 	state = &intel_state->base;
 
 	__drm_atomic_helper_plane_duplicate_state(plane, state);
-	intel_state->wait_req = NULL;
 
 	return state;
 }
@@ -101,7 +100,6 @@ void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
-	WARN_ON(state && to_intel_plane_state(state)->wait_req);
 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 87135d93e828..622f733b9347 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -37,7 +37,6 @@
 #include "intel_frontbuffer.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
-#include "i915_gem_dmabuf.h"
 #include "intel_dsi.h"
 #include "i915_trace.h"
 #include <drm/drm_atomic.h>
@@ -11967,8 +11966,6 @@ static int intel_gen7_queue_flip(struct drm_device *dev,
 static bool use_mmio_flip(struct intel_engine_cs *engine,
 			  struct drm_i915_gem_object *obj)
 {
-	struct reservation_object *resv;
-
 	/*
 	 * This is not being used for older platforms, because
 	 * non-availability of flip done interrupt forces us to use
@@ -11990,12 +11987,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine,
 	else if (i915.enable_execlists)
 		return true;
 
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv && !reservation_object_test_signaled_rcu(resv, false))
-		return true;
-
-	return engine != i915_gem_active_get_engine(&obj->last_write,
-						    &obj->base.dev->struct_mutex);
+	return engine != i915_gem_object_last_write_engine(obj);
 }
 
 static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
@@ -12068,17 +12060,8 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
 	struct intel_framebuffer *intel_fb =
 		to_intel_framebuffer(crtc->base.primary->fb);
 	struct drm_i915_gem_object *obj = intel_fb->obj;
-	struct reservation_object *resv;
 
-	if (work->flip_queued_req)
-		WARN_ON(i915_wait_request(work->flip_queued_req,
-					  0, MAX_SCHEDULE_TIMEOUT) < 0);
-
-	/* For framebuffer backed by dmabuf, wait for fence */
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv)
-		WARN_ON(reservation_object_wait_timeout_rcu(resv, false, false,
-							    MAX_SCHEDULE_TIMEOUT) < 0);
+	WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0);
 
 	intel_pipe_update_start(crtc);
 
@@ -12279,8 +12262,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 	} else if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) {
 		engine = dev_priv->engine[BCS];
 	} else if (INTEL_INFO(dev)->gen >= 7) {
-		engine = i915_gem_active_get_engine(&obj->last_write,
-						    &obj->base.dev->struct_mutex);
+		engine = i915_gem_object_last_write_engine(obj);
 		if (engine == NULL || engine->id != RCS)
 			engine = dev_priv->engine[BCS];
 	} else {
@@ -12312,9 +12294,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 
 	if (mmio_flip) {
 		INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func);
-
-		work->flip_queued_req = i915_gem_active_get(&obj->last_write,
-							    &obj->base.dev->struct_mutex);
 		queue_work(system_unbound_wq, &work->mmio_work);
 	} else {
 		request = i915_gem_request_alloc(engine, engine->last_context);
@@ -14154,13 +14133,10 @@ static int intel_atomic_check(struct drm_device *dev,
 }
 
 static int intel_atomic_prepare_commit(struct drm_device *dev,
-				       struct drm_atomic_state *state,
-				       bool nonblock)
+				       struct drm_atomic_state *state)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_plane_state *plane_state;
 	struct drm_crtc_state *crtc_state;
-	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	int i, ret;
 
@@ -14183,30 +14159,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
 	ret = drm_atomic_helper_prepare_planes(dev, state);
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!ret && !nonblock) {
-		for_each_plane_in_state(state, plane, plane_state, i) {
-			struct intel_plane_state *intel_plane_state =
-				to_intel_plane_state(plane_state);
-			long timeout;
-
-			if (!intel_plane_state->wait_req)
-				continue;
-
-			timeout = i915_wait_request(intel_plane_state->wait_req,
-						    I915_WAIT_INTERRUPTIBLE,
-						    MAX_SCHEDULE_TIMEOUT);
-			if (timeout < 0) {
-				/* Any hang should be swallowed by the wait */
-				WARN_ON(timeout == -EIO);
-				mutex_lock(&dev->struct_mutex);
-				drm_atomic_helper_cleanup_planes(dev, state);
-				mutex_unlock(&dev->struct_mutex);
-				ret = timeout;
-				break;
-			}
-		}
-	}
-
 	return ret;
 }
 
@@ -14400,26 +14352,11 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 	struct drm_crtc_state *old_crtc_state;
 	struct drm_crtc *crtc;
 	struct intel_crtc_state *intel_cstate;
-	struct drm_plane *plane;
-	struct drm_plane_state *plane_state;
 	bool hw_check = intel_state->modeset;
 	unsigned long put_domains[I915_MAX_PIPES] = {};
 	unsigned crtc_vblank_mask = 0;
 	int i;
 
-	for_each_plane_in_state(state, plane, plane_state, i) {
-		struct intel_plane_state *intel_plane_state =
-			to_intel_plane_state(plane->state);
-
-		if (!intel_plane_state->wait_req)
-			continue;
-
-		/* EIO should be eaten, and we can't get interrupted in the
-		 * worker, and blocking commits have waited already. */
-		WARN_ON(i915_wait_request(intel_plane_state->wait_req,
-					  0, MAX_SCHEDULE_TIMEOUT) < 0);
-	}
-
 	drm_atomic_helper_wait_for_dependencies(state);
 
 	if (intel_state->modeset) {
@@ -14626,7 +14563,7 @@ static int intel_atomic_commit(struct drm_device *dev,
 
 	INIT_WORK(&state->commit_work, intel_atomic_commit_work);
 
-	ret = intel_atomic_prepare_commit(dev, state, nonblock);
+	ret = intel_atomic_prepare_commit(dev, state);
 	if (ret) {
 		DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret);
 		return ret;
@@ -14759,7 +14696,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 	struct drm_framebuffer *fb = new_state->fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb);
-	struct reservation_object *resv;
+	long lret;
 	int ret = 0;
 
 	if (!obj && !old_obj)
@@ -14797,39 +14734,34 @@ intel_prepare_plane_fb(struct drm_plane *plane,
 		return 0;
 
 	/* For framebuffer backed by dmabuf, wait for fence */
-	resv = i915_gem_object_get_dmabuf_resv(obj);
-	if (resv) {
-		long lret;
+	lret = i915_gem_object_wait(obj,
+				    I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+				    MAX_SCHEDULE_TIMEOUT,
+				    NULL);
+	if (lret == -ERESTARTSYS)
+		return lret;
 
-		lret = reservation_object_wait_timeout_rcu(resv, false, true,
-							   MAX_SCHEDULE_TIMEOUT);
-		if (lret == -ERESTARTSYS)
-			return lret;
-
-		WARN(lret < 0, "waiting returns %li\n", lret);
-	}
+	WARN(lret < 0, "waiting returns %li\n", lret);
 
 	if (plane->type == DRM_PLANE_TYPE_CURSOR &&
 	    INTEL_INFO(dev)->cursor_needs_physical) {
 		int align = IS_I830(dev_priv) ? 16 * 1024 : 256;
 		ret = i915_gem_object_attach_phys(obj, align);
-		if (ret)
+		if (ret) {
 			DRM_DEBUG_KMS("failed to attach phys object\n");
+			return ret;
+		}
 	} else {
 		struct i915_vma *vma;
 
 		vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation);
-		if (IS_ERR(vma))
-			ret = PTR_ERR(vma);
-	}
-
-	if (ret == 0) {
-		to_intel_plane_state(new_state)->wait_req =
-			i915_gem_active_get(&obj->last_write,
-					    &obj->base.dev->struct_mutex);
+		if (IS_ERR(vma)) {
+			DRM_DEBUG_KMS("failed to pin object\n");
+			return PTR_ERR(vma);
+		}
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -14847,7 +14779,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 {
 	struct drm_device *dev = plane->dev;
 	struct intel_plane_state *old_intel_state;
-	struct intel_plane_state *intel_state = to_intel_plane_state(plane->state);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb);
 	struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb);
 
@@ -14859,9 +14790,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane,
 	if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR ||
 	    !INTEL_INFO(dev)->cursor_needs_physical))
 		intel_unpin_fb_obj(old_state->fb, old_state->rotation);
-
-	i915_gem_request_assign(&intel_state->wait_req, NULL);
-	i915_gem_request_assign(&old_intel_state->wait_req, NULL);
 }
 
 int
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ace222c74f71..ec7fa592b20a 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -401,9 +401,6 @@ struct intel_plane_state {
 	int scaler_id;
 
 	struct drm_intel_sprite_colorkey ckey;
-
-	/* async flip related structures */
-	struct drm_i915_gem_request *wait_req;
 };
 
 struct intel_initial_plane_config {
-- 
cgit v1.2.3


From 80b204bce8f27b52cd65839e0e6144b4452ae3de Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Oct 2016 13:58:58 +0100
Subject: drm/i915: Enable multiple timelines

With the infrastructure converted over to tracking multiple timelines in
the GEM API whilst preserving the efficiency of using a single execution
timeline internally, we can now assign a separate timeline to every
context with full-ppgtt.

v2: Add a comment to indicate the xfer between timelines upon submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-35-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h          | 10 ++++++
 drivers/gpu/drm/i915/i915_gem.c          | 10 +++---
 drivers/gpu/drm/i915/i915_gem_context.c  |  4 +--
 drivers/gpu/drm/i915/i915_gem_evict.c    | 11 +++---
 drivers/gpu/drm/i915/i915_gem_gtt.c      | 19 ++++++----
 drivers/gpu/drm/i915/i915_gem_gtt.h      |  4 ++-
 drivers/gpu/drm/i915/i915_gem_request.c  | 61 +++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_timeline.c |  1 +
 drivers/gpu/drm/i915/i915_gem_timeline.h |  3 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  5 ---
 10 files changed, 77 insertions(+), 51 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eacb144af29e..42a499681966 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3549,6 +3549,16 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
 	kref_put(&ctx->ref, i915_gem_context_free);
 }
 
+static inline struct intel_timeline *
+i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
+				 struct intel_engine_cs *engine)
+{
+	struct i915_address_space *vm;
+
+	vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
+	return &vm->timeline.engine[engine->id];
+}
+
 static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
 {
 	return c->user_handle == DEFAULT_CONTEXT_HANDLE;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8a5d20715e5f..1e5d2bf777e4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2564,12 +2564,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
 	 * not need an engine->irq_seqno_barrier() before the seqno reads.
 	 */
 	list_for_each_entry(request, &engine->timeline->requests, link) {
-		if (i915_gem_request_completed(request))
+		if (__i915_gem_request_completed(request))
 			continue;
 
-		if (!i915_sw_fence_done(&request->submit))
-			break;
-
 		return request;
 	}
 
@@ -2597,6 +2594,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *request;
 	struct i915_gem_context *incomplete_ctx;
+	struct intel_timeline *timeline;
 	bool ring_hung;
 
 	if (engine->irq_seqno_barrier)
@@ -2635,6 +2633,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 	list_for_each_entry_continue(request, &engine->timeline->requests, link)
 		if (request->ctx == incomplete_ctx)
 			reset_request(request);
+
+	timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
+	list_for_each_entry(request, &timeline->requests, link)
+		reset_request(request);
 }
 
 void i915_gem_reset(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d3118db244c4..461aece6c5bd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev,
 		return ctx;
 
 	if (USES_FULL_PPGTT(dev)) {
-		struct i915_hw_ppgtt *ppgtt =
-			i915_ppgtt_create(to_i915(dev), file_priv);
+		struct i915_hw_ppgtt *ppgtt;
 
+		ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name);
 		if (IS_ERR(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 79b964152cd9..bd08814b015c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -33,14 +33,17 @@
 #include "intel_drv.h"
 #include "i915_trace.h"
 
-static bool
-gpu_is_idle(struct drm_i915_private *dev_priv)
+static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
 {
+	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
 	for_each_engine(engine, dev_priv, id) {
-		if (intel_engine_is_active(engine))
+		struct intel_timeline *tl;
+
+		tl = &ggtt->base.timeline.engine[engine->id];
+		if (i915_gem_active_isset(&tl->last_request))
 			return false;
 	}
 
@@ -154,7 +157,7 @@ search_again:
 	if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
 		return -ENOSPC;
 
-	if (gpu_is_idle(dev_priv)) {
+	if (ggtt_is_idle(dev_priv)) {
 		/* If we still have pending pageflip completions, drop
 		 * back to userspace to give our workqueues time to
 		 * acquire our locks and unpin the old scanouts.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1b1a459e2b68..e7afad585929 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2185,8 +2185,10 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 }
 
 static void i915_address_space_init(struct i915_address_space *vm,
-				    struct drm_i915_private *dev_priv)
+				    struct drm_i915_private *dev_priv,
+				    const char *name)
 {
+	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
 	drm_mm_init(&vm->mm, vm->start, vm->total);
 	INIT_LIST_HEAD(&vm->active_list);
 	INIT_LIST_HEAD(&vm->inactive_list);
@@ -2215,14 +2217,15 @@ static void gtt_write_workarounds(struct drm_device *dev)
 
 static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
 			   struct drm_i915_private *dev_priv,
-			   struct drm_i915_file_private *file_priv)
+			   struct drm_i915_file_private *file_priv,
+			   const char *name)
 {
 	int ret;
 
 	ret = __hw_ppgtt_init(ppgtt, dev_priv);
 	if (ret == 0) {
 		kref_init(&ppgtt->ref);
-		i915_address_space_init(&ppgtt->base, dev_priv);
+		i915_address_space_init(&ppgtt->base, dev_priv, name);
 		ppgtt->base.file = file_priv;
 	}
 
@@ -2258,7 +2261,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
 
 struct i915_hw_ppgtt *
 i915_ppgtt_create(struct drm_i915_private *dev_priv,
-		  struct drm_i915_file_private *fpriv)
+		  struct drm_i915_file_private *fpriv,
+		  const char *name)
 {
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
@@ -2267,7 +2271,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv,
 	if (!ppgtt)
 		return ERR_PTR(-ENOMEM);
 
-	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv);
+	ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name);
 	if (ret) {
 		kfree(ppgtt);
 		return ERR_PTR(ret);
@@ -2290,6 +2294,7 @@ void  i915_ppgtt_release(struct kref *kref)
 	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
 	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
 
+	i915_gem_timeline_fini(&ppgtt->base.timeline);
 	list_del(&ppgtt->base.global_link);
 	drm_mm_takedown(&ppgtt->base.mm);
 
@@ -3232,11 +3237,13 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 	/* Subtract the guard page before address space initialization to
 	 * shrink the range used by drm_mm.
 	 */
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	ggtt->base.total -= PAGE_SIZE;
-	i915_address_space_init(&ggtt->base, dev_priv);
+	i915_address_space_init(&ggtt->base, dev_priv, "[global]");
 	ggtt->base.total += PAGE_SIZE;
 	if (!HAS_LLC(dev_priv))
 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
 				dev_priv->ggtt.mappable_base,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 9f0327e5176a..518e75b64290 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -342,6 +342,7 @@ struct i915_pml4 {
 
 struct i915_address_space {
 	struct drm_mm mm;
+	struct i915_gem_timeline timeline;
 	struct drm_device *dev;
 	/* Every address space belongs to a struct file - except for the global
 	 * GTT that is owned by the driver (and so @file is set to NULL). In
@@ -613,7 +614,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
 int i915_ppgtt_init_hw(struct drm_device *dev);
 void i915_ppgtt_release(struct kref *kref);
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
-					struct drm_i915_file_private *fpriv);
+					struct drm_i915_file_private *fpriv,
+					const char *name);
 static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
 	if (ppgtt)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 7499e3b205c6..79b0046d9a57 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -34,12 +34,6 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 
 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 {
-	/* Timelines are bound by eviction to a VM. However, since
-	 * we only have a global seqno at the moment, we only have
-	 * a single timeline. Note that each timeline will have
-	 * multiple execution contexts (fence contexts) as we allow
-	 * engines within a single timeline to execute in parallel.
-	 */
 	return to_request(fence)->timeline->common->name;
 }
 
@@ -64,18 +58,6 @@ static signed long i915_fence_wait(struct dma_fence *fence,
 	return i915_wait_request(to_request(fence), interruptible, timeout);
 }
 
-static void i915_fence_value_str(struct dma_fence *fence, char *str, int size)
-{
-	snprintf(str, size, "%u", fence->seqno);
-}
-
-static void i915_fence_timeline_value_str(struct dma_fence *fence, char *str,
-					  int size)
-{
-	snprintf(str, size, "%u",
-		 intel_engine_get_seqno(to_request(fence)->engine));
-}
-
 static void i915_fence_release(struct dma_fence *fence)
 {
 	struct drm_i915_gem_request *req = to_request(fence);
@@ -90,8 +72,6 @@ const struct dma_fence_ops i915_fence_ops = {
 	.signaled = i915_fence_signaled,
 	.wait = i915_fence_wait,
 	.release = i915_fence_release,
-	.fence_value_str = i915_fence_value_str,
-	.timeline_value_str = i915_fence_timeline_value_str,
 };
 
 int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
@@ -147,7 +127,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	GEM_BUG_ON(!i915_gem_request_completed(request));
 
 	trace_i915_gem_request_retire(request);
+
+	spin_lock_irq(&request->engine->timeline->lock);
 	list_del_init(&request->link);
+	spin_unlock_irq(&request->engine->timeline->lock);
 
 	/* We know the GPU must have read the request to have
 	 * sent us the seqno + interrupt, so use the position
@@ -313,6 +296,12 @@ static int reserve_global_seqno(struct drm_i915_private *i915)
 	return 0;
 }
 
+static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
+{
+	/* next_seqno only incremented under a mutex */
+	return ++tl->next_seqno.counter;
+}
+
 static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
 {
 	return atomic_inc_return(&tl->next_seqno);
@@ -325,16 +314,20 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 		container_of(fence, typeof(*request), submit);
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_timeline *timeline;
+	unsigned long flags;
 	u32 seqno;
 
 	if (state != FENCE_COMPLETE)
 		return NOTIFY_DONE;
 
-	/* Will be called from irq-context when using foreign DMA fences */
+	/* Transfer from per-context onto the global per-engine timeline */
+	timeline = engine->timeline;
+	GEM_BUG_ON(timeline == request->timeline);
 
-	timeline = request->timeline;
+	/* Will be called from irq-context when using foreign DMA fences */
+	spin_lock_irqsave(&timeline->lock, flags);
 
-	seqno = request->fence.seqno;
+	seqno = timeline_get_seqno(timeline->common);
 	GEM_BUG_ON(!seqno);
 	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
 
@@ -354,6 +347,12 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 				request->ring->vaddr + request->postfix);
 	engine->submit_request(request);
 
+	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
+	list_move_tail(&request->link, &timeline->requests);
+	spin_unlock(&request->timeline->lock);
+
+	spin_unlock_irqrestore(&timeline->lock, flags);
+
 	return NOTIFY_DONE;
 }
 
@@ -394,7 +393,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	/* Move the oldest request to the slab-cache (if not in use!) */
 	req = list_first_entry_or_null(&engine->timeline->requests,
 				       typeof(*req), link);
-	if (req && i915_gem_request_completed(req))
+	if (req && __i915_gem_request_completed(req))
 		i915_gem_request_retire(req);
 
 	/* Beware: Dragons be flying overhead.
@@ -431,14 +430,15 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 		goto err_unreserve;
 	}
 
-	req->timeline = engine->timeline;
+	req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
+	GEM_BUG_ON(req->timeline == engine->timeline);
 
 	spin_lock_init(&req->lock);
 	dma_fence_init(&req->fence,
 		       &i915_fence_ops,
 		       &req->lock,
 		       req->timeline->fence_context,
-		       timeline_get_seqno(req->timeline->common));
+		       __timeline_get_seqno(req->timeline->common));
 
 	i915_sw_fence_init(&req->submit, submit_notify);
 
@@ -722,9 +722,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
 					     &request->submitq);
 
+	spin_lock_irq(&timeline->lock);
 	list_add_tail(&request->link, &timeline->requests);
+	spin_unlock_irq(&timeline->lock);
+
+	GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
+				     request->fence.seqno));
 
-	timeline->last_pending_seqno = request->fence.seqno;
+	timeline->last_submitted_seqno = request->fence.seqno;
 	i915_gem_active_set(&timeline->last_request, request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
@@ -991,7 +996,7 @@ static void engine_retire_requests(struct intel_engine_cs *engine)
 
 	list_for_each_entry_safe(request, next,
 				 &engine->timeline->requests, link) {
-		if (!i915_gem_request_completed(request))
+		if (!__i915_gem_request_completed(request))
 			return;
 
 		i915_gem_request_retire(request);
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index a1bd03d10852..fc8f13a79f8f 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -48,6 +48,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 		tl->fence_context = fences++;
 		tl->common = timeline;
 
+		spin_lock_init(&tl->lock);
 		init_request_active(&tl->last_request, NULL);
 		INIT_LIST_HEAD(&tl->requests);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h
index 18e603980dd9..f2bf7b1d49a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -34,7 +34,8 @@ struct i915_gem_timeline;
 struct intel_timeline {
 	u64 fence_context;
 	u32 last_submitted_seqno;
-	u32 last_pending_seqno;
+
+	spinlock_t lock;
 
 	/**
 	 * List of breadcrumbs associated with GPU requests currently
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index d16c74ae8f54..43f61aa24ed7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -569,9 +569,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 unsigned int intel_kick_waiters(struct drm_i915_private *i915);
 unsigned int intel_kick_signalers(struct drm_i915_private *i915);
 
-static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
-{
-	return i915_gem_active_isset(&engine->timeline->last_request);
-}
-
 #endif /* _INTEL_RINGBUFFER_H_ */
-- 
cgit v1.2.3


From db6c2b4151f2915fe1695cdcac43b32e73d1ad32 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 1 Nov 2016 11:54:00 +0000
Subject: drm/i915: Store the vma in an rbtree under the object

With full-ppgtt one of the main bottlenecks is the lookup of the VMA
underneath the object. For execbuf there is merit in having a very fast
direct lookup of ctx:handle to the vma using a hashtree, but that still
leaves a large number of other lookups. One way to speed up the lookup
would be to use a rhashtable, but that requires extra allocations and
may exhibit poor worse case behaviour. An alternative is to use an
embedded rbtree, i.e. no extra allocations and deterministic behaviour,
but at the slight cost of O(lgN) lookups (instead of O(1) for
rhashtable). The major of such tree will be very shallow and so not much
slower, and still scales much, much better than the current unsorted
list.

v2: Bump vma_compare() to return a long, as we return the result of
comparing two pointers.

References: https://bugs.freedesktop.org/show_bug.cgi?id=87726
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161101115400.15647-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/i915_gem.c     |  2 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 80 +++++++++++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
 4 files changed, 59 insertions(+), 25 deletions(-)

(limited to 'drivers/gpu/drm/i915/i915_gem_gtt.h')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 539106a9c1af..9143129cd851 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2230,6 +2230,7 @@ struct drm_i915_gem_object {
 
 	/** List of VMAs backed by this object */
 	struct list_head vma_list;
+	struct rb_root vma_tree;
 
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c9e52f75e1cb..1568f6756430 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4266,6 +4266,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 			vma->flags &= ~I915_VMA_PIN_MASK;
 			i915_vma_close(vma);
 		}
+		GEM_BUG_ON(!list_empty(&obj->vma_list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
 
 		list_del(&obj->global_list);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e7afad585929..00606a27e9aa 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3399,6 +3399,7 @@ void i915_vma_destroy(struct i915_vma *vma)
 	GEM_BUG_ON(!i915_vma_is_closed(vma));
 	GEM_BUG_ON(vma->fence);
 
+	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 	list_del(&vma->vm_link);
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
@@ -3416,12 +3417,33 @@ void i915_vma_close(struct i915_vma *vma)
 		WARN_ON(i915_vma_unbind(vma));
 }
 
+static inline long vma_compare(struct i915_vma *vma,
+			       struct i915_address_space *vm,
+			       const struct i915_ggtt_view *view)
+{
+	GEM_BUG_ON(view && !i915_vma_is_ggtt(vma));
+
+	if (vma->vm != vm)
+		return vma->vm - vm;
+
+	if (!view)
+		return vma->ggtt_view.type;
+
+	if (vma->ggtt_view.type != view->type)
+		return vma->ggtt_view.type - view->type;
+
+	return memcmp(&vma->ggtt_view.params,
+		      &view->params,
+		      sizeof(view->params));
+}
+
 static struct i915_vma *
 __i915_vma_create(struct drm_i915_gem_object *obj,
 		  struct i915_address_space *vm,
 		  const struct i915_ggtt_view *view)
 {
 	struct i915_vma *vma;
+	struct rb_node *rb, **p;
 	int i;
 
 	GEM_BUG_ON(vm->closed);
@@ -3455,33 +3477,28 @@ __i915_vma_create(struct drm_i915_gem_object *obj,
 
 	if (i915_is_ggtt(vm)) {
 		vma->flags |= I915_VMA_GGTT;
+		list_add(&vma->obj_link, &obj->vma_list);
 	} else {
 		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
+		list_add_tail(&vma->obj_link, &obj->vma_list);
 	}
 
-	list_add_tail(&vma->obj_link, &obj->vma_list);
-	return vma;
-}
+	rb = NULL;
+	p = &obj->vma_tree.rb_node;
+	while (*p) {
+		struct i915_vma *pos;
 
-static inline bool vma_matches(struct i915_vma *vma,
-			       struct i915_address_space *vm,
-			       const struct i915_ggtt_view *view)
-{
-	if (vma->vm != vm)
-		return false;
-
-	if (!i915_vma_is_ggtt(vma))
-		return true;
-
-	if (!view)
-		return vma->ggtt_view.type == 0;
-
-	if (vma->ggtt_view.type != view->type)
-		return false;
+		rb = *p;
+		pos = rb_entry(rb, struct i915_vma, obj_node);
+		if (vma_compare(pos, vm, view) < 0)
+			p = &rb->rb_right;
+		else
+			p = &rb->rb_left;
+	}
+	rb_link_node(&vma->obj_node, rb, p);
+	rb_insert_color(&vma->obj_node, &obj->vma_tree);
 
-	return memcmp(&vma->ggtt_view.params,
-		      &view->params,
-		      sizeof(view->params)) == 0;
+	return vma;
 }
 
 struct i915_vma *
@@ -3501,12 +3518,23 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
 		    const struct i915_ggtt_view *view)
 {
-	struct i915_vma *vma;
+	struct rb_node *rb;
+
+	rb = obj->vma_tree.rb_node;
+	while (rb) {
+		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
+		long cmp;
 
-	list_for_each_entry_reverse(vma, &obj->vma_list, obj_link)
-		if (vma_matches(vma, vm, view))
+		cmp = vma_compare(vma, vm, view);
+		if (cmp == 0)
 			return vma;
 
+		if (cmp < 0)
+			rb = rb->rb_right;
+		else
+			rb = rb->rb_left;
+	}
+
 	return NULL;
 }
 
@@ -3521,8 +3549,10 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 
 	vma = i915_gem_obj_to_vma(obj, vm, view);
-	if (!vma)
+	if (!vma) {
 		vma = __i915_vma_create(obj, vm, view);
+		GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view));
+	}
 
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 518e75b64290..c23ef9db1f53 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -227,6 +227,7 @@ struct i915_vma {
 	struct list_head vm_link;
 
 	struct list_head obj_link; /* Link in the object's VMA list */
+	struct rb_node obj_node;
 
 	/** This vma's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
-- 
cgit v1.2.3