From 73dec95e6ba37d8138bb111be5c9b8a1f3a622ae Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 11:32:42 +0000 Subject: drm/i915: Emit to ringbuffer directly This removes the usage of intel_ring_emit in favour of directly writing to the ring buffer. intel_ring_emit was preventing the compiler for optimising fetch and increment of the current ring buffer pointer and therefore generating very verbose code for every write. It had no useful purpose since all ringbuffer operations are started and ended with intel_ring_begin and intel_ring_advance respectively, with no bail out in the middle possible, so it is fine to increment the tail in intel_ring_begin and let the code manage the pointer itself. Useless instruction removal amounts to approximately two and half kilobytes of saved text on my build. Not sure if this has any measurable performance implications but executing a ton of useless instructions on fast paths cannot be good. v2: * Change return from intel_ring_begin to error pointer by popular demand. * Move tail increment to intel_ring_advance to enable some error checking. v3: * Move tail advance back into intel_ring_begin. * Rebase and tidy. v4: * Complete rebase after a few months since v3. v5: * Remove unecessary cast and fix !debug compile. (Chris Wilson) v6: * Make intel_ring_offset take request as well. * Fix recording of request postfix plus a sprinkle of asserts. (Chris Wilson) v7: * Use intel_ring_offset to get the postfix. (Chris Wilson) * Convert GVT code as well. v8: * Rename *out++ to *cs++. v9: * Fix GVT out to cs conversion in GVT. v10: * Rebase for new intel_ring_begin in selftests. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Zhi Wang Reviewed-by: Chris Wilson Acked-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170214113242.29241-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_overlay.c | 79 ++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_overlay.c') diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0608fad7f593..5ef9f5bfb92c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; + u32 *cs; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); @@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 4); - if (ret) { + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } overlay->active = true; @@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, false); - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; + *cs++ = overlay->flip_addr | OFC_UPDATE; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; - u32 tmp; - int ret; + u32 tmp, *cs; WARN_ON(!overlay->active); @@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, vma); @@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_gem_request *req; - struct intel_ring *ring; - u32 flip_addr = overlay->flip_addr; - int ret; + u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 6); - if (ret) { + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - /* wait for overlay to go idle */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; /* turn overlay off */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, NULL); @@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + u32 *cs; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ring *ring; req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, - MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); -- cgit v1.2.3 From e642c85b03de10eb1b411884ba27550651d0100b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Mar 2017 11:47:09 +0000 Subject: drm/i915: Remove superfluous i915_add_request_no_flush() helper The only time we need to emit a flush inside request emission is after an execbuffer, for which we can use the full __i915_add_request(). All other instances want the simpler i915_add_request() without flushing, so remove the useless helper. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170317114709.8388-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 -- drivers/gpu/drm/i915/intel_display.c | 4 ++-- drivers/gpu/drm/i915/intel_overlay.c | 8 ++++---- drivers/gpu/drm/i915/intel_pm.c | 2 +- 6 files changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_overlay.c') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index b14c5c2109d4..31d2240fdb1f 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -212,7 +212,7 @@ out: workload->status = ret; if (!IS_ERR_OR_NULL(rq)) - i915_add_request_no_flush(rq); + i915_add_request(rq); mutex_unlock(&dev_priv->drm.struct_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 000508502cf9..486051ed681d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -933,7 +933,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) } ret = i915_switch_context(req); - i915_add_request_no_flush(req); + i915_add_request(req); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0cef887b0de4..a211c53c813f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -267,8 +267,6 @@ int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); #define i915_add_request(req) \ - __i915_add_request(req, true) -#define i915_add_request_no_flush(req) \ __i915_add_request(req, false) void __i915_gem_request_submit(struct drm_i915_gem_request *request); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5959c9b6dc97..010e5ddb198a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10668,7 +10668,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_mark_page_flip_active(intel_crtc, work); work->flip_queued_req = i915_gem_request_get(request); - i915_add_request_no_flush(request); + i915_add_request(request); } i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); @@ -10684,7 +10684,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, return 0; cleanup_request: - i915_add_request_no_flush(request); + i915_add_request(request); cleanup_unpin: to_intel_plane_state(primary->state)->vma = work->old_vma; intel_unpin_fb_vma(vma); diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 5ef9f5bfb92c..2e0c56ed22bb 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -278,7 +278,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) cs = intel_ring_begin(req, 4); if (IS_ERR(cs)) { - i915_add_request_no_flush(req); + i915_add_request(req); return PTR_ERR(cs); } @@ -343,7 +343,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay, cs = intel_ring_begin(req, 2); if (IS_ERR(cs)) { - i915_add_request_no_flush(req); + i915_add_request(req); return PTR_ERR(cs); } @@ -419,7 +419,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) cs = intel_ring_begin(req, 6); if (IS_ERR(cs)) { - i915_add_request_no_flush(req); + i915_add_request(req); return PTR_ERR(cs); } @@ -477,7 +477,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) cs = intel_ring_begin(req, 2); if (IS_ERR(cs)) { - i915_add_request_no_flush(req); + i915_add_request(req); return PTR_ERR(cs); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 934a8c00073a..d55bf882d1aa 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7086,7 +7086,7 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work) rcs->init_context(req); /* Mark the device busy, calling intel_enable_gt_powersave() */ - i915_add_request_no_flush(req); + i915_add_request(req); unlock: mutex_unlock(&dev_priv->drm.struct_mutex); -- cgit v1.2.3