diff options
author | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2017-02-14 11:32:42 +0000 |
---|---|---|
committer | Tvrtko Ursulin <tvrtko.ursulin@intel.com> | 2017-02-14 14:30:46 +0000 |
commit | 73dec95e6ba37d8138bb111be5c9b8a1f3a622ae (patch) | |
tree | b1051f0903c87d4a1ca9c5100af380c0683a4a70 /drivers/gpu/drm/i915/intel_ringbuffer.h | |
parent | d2d1501625e96170958f38646a9fcc9b69bbc2df (diff) | |
download | talos-obmc-linux-73dec95e6ba37d8138bb111be5c9b8a1f3a622ae.tar.gz talos-obmc-linux-73dec95e6ba37d8138bb111be5c9b8a1f3a622ae.zip |
drm/i915: Emit to ringbuffer directly
This removes the usage of intel_ring_emit in favour of
directly writing to the ring buffer.
intel_ring_emit was preventing the compiler for optimising
fetch and increment of the current ring buffer pointer and
therefore generating very verbose code for every write.
It had no useful purpose since all ringbuffer operations
are started and ended with intel_ring_begin and
intel_ring_advance respectively, with no bail out in the
middle possible, so it is fine to increment the tail in
intel_ring_begin and let the code manage the pointer
itself.
Useless instruction removal amounts to approximately
two and half kilobytes of saved text on my build.
Not sure if this has any measurable performance
implications but executing a ton of useless instructions
on fast paths cannot be good.
v2:
* Change return from intel_ring_begin to error pointer by
popular demand.
* Move tail increment to intel_ring_advance to enable some
error checking.
v3:
* Move tail advance back into intel_ring_begin.
* Rebase and tidy.
v4:
* Complete rebase after a few months since v3.
v5:
* Remove unecessary cast and fix !debug compile. (Chris Wilson)
v6:
* Make intel_ring_offset take request as well.
* Fix recording of request postfix plus a sprinkle of asserts.
(Chris Wilson)
v7:
* Use intel_ring_offset to get the postfix. (Chris Wilson)
* Convert GVT code as well.
v8:
* Rename *out++ to *cs++.
v9:
* Fix GVT out to cs conversion in GVT.
v10:
* Rebase for new intel_ring_begin in selftests.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170214113242.29241-1-tvrtko.ursulin@linux.intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.h')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.h | 30 |
1 files changed, 11 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cc62e89010d3..4350713dbc58 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -145,7 +145,6 @@ struct intel_ring { u32 head; u32 tail; - GEM_DEBUG_DECL(u32 advance); int space; int size; @@ -292,7 +291,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *out); + u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -375,7 +374,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; /* Execlists */ @@ -497,21 +496,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ring *ring, u32 data) -{ - *(uint32_t *)(ring->vaddr + ring->tail) = data; - ring->tail += 4; -} +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); -static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) -{ - intel_ring_emit(ring, i915_mmio_reg_offset(reg)); -} - -static inline void intel_ring_advance(struct intel_ring *ring) +static inline void +intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) { /* Dummy function. * @@ -521,14 +511,16 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_DEBUG_BUG_ON(ring->tail != ring->advance); + GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); } -static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) +static inline u32 +intel_ring_offset(struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + u32 offset = addr - req->ring->vaddr; + GEM_BUG_ON(offset > req->ring->size); + return offset & (req->ring->size - 1); } int __intel_ring_space(int head, int tail, int size); |