From a09ba7faf75fa4b21980d81de8e5f3d5c0785ccf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sat, 29 Aug 2009 12:49:51 -0700 Subject: drm/i915: Fix CPU-spinning hangs related to fence usage by using an LRU. The lack of a proper LRU was partially worked around by taking the fence from the object containing the oldest seqno. But if there are multiple objects inactive, then they don't have seqnos and the first fence reg among them would be chosen. If you were trying to copy data between two mappings, this could result in each page fault stealing the fence from the other argument, and your application hanging. https://bugs.freedesktop.org/show_bug.cgi?id=23566 https://bugs.freedesktop.org/show_bug.cgi?id=23220 https://bugs.freedesktop.org/show_bug.cgi?id=23253 https://bugs.freedesktop.org/show_bug.cgi?id=23366 Cc: Stable Team Signed-off-by: Eric Anholt Reviewed-by: Jesse Barnes Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 86 +++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 38 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 140bee142fc2..0c07a755b3a3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -978,6 +978,7 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_set_domain *args = data; struct drm_gem_object *obj; uint32_t read_domains = args->read_domains; @@ -1010,8 +1011,18 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, obj, obj->size, read_domains, write_domain); #endif if (read_domains & I915_GEM_DOMAIN_GTT) { + struct drm_i915_gem_object *obj_priv = obj->driver_private; + ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); + /* Update the LRU on the fence for the CPU access that's + * about to occur. + */ + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { + list_move_tail(&obj_priv->fence_list, + &dev_priv->mm.fence_list); + } + /* Silently promote "you're not bound, there was nothing to do" * to success, since the client was just asking us to * make sure everything was done. @@ -1155,8 +1166,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } /* Need a new fence register? */ - if (obj_priv->fence_reg == I915_FENCE_REG_NONE && - obj_priv->tiling_mode != I915_TILING_NONE) { + if (obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj); if (ret) { mutex_unlock(&dev->struct_mutex); @@ -2208,6 +2218,12 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) struct drm_i915_gem_object *old_obj_priv = NULL; int i, ret, avail; + /* Just update our place in the LRU if our fence is getting used. */ + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) { + list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); + return 0; + } + switch (obj_priv->tiling_mode) { case I915_TILING_NONE: WARN(1, "allocating a fence for non-tiled object?\n"); @@ -2229,7 +2245,6 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) } /* First try to find a free reg */ -try_again: avail = 0; for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { reg = &dev_priv->fence_regs[i]; @@ -2243,52 +2258,41 @@ try_again: /* None available, try to steal one or wait for a user to finish */ if (i == dev_priv->num_fence_regs) { - uint32_t seqno = dev_priv->mm.next_gem_seqno; + struct drm_gem_object *old_obj = NULL; if (avail == 0) return -ENOSPC; - for (i = dev_priv->fence_reg_start; - i < dev_priv->num_fence_regs; i++) { - uint32_t this_seqno; + list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list, + fence_list) { + old_obj = old_obj_priv->obj; - reg = &dev_priv->fence_regs[i]; - old_obj_priv = reg->obj->driver_private; + reg = &dev_priv->fence_regs[old_obj_priv->fence_reg]; if (old_obj_priv->pin_count) continue; + /* Take a reference, as otherwise the wait_rendering + * below may cause the object to get freed out from + * under us. + */ + drm_gem_object_reference(old_obj); + /* i915 uses fences for GPU access to tiled buffers */ if (IS_I965G(dev) || !old_obj_priv->active) break; - /* find the seqno of the first available fence */ - this_seqno = old_obj_priv->last_rendering_seqno; - if (this_seqno != 0 && - reg->obj->write_domain == 0 && - i915_seqno_passed(seqno, this_seqno)) - seqno = this_seqno; - } - - /* - * Now things get ugly... we have to wait for one of the - * objects to finish before trying again. - */ - if (i == dev_priv->num_fence_regs) { - if (seqno == dev_priv->mm.next_gem_seqno) { - i915_gem_flush(dev, - I915_GEM_GPU_DOMAINS, - I915_GEM_GPU_DOMAINS); - seqno = i915_add_request(dev, NULL, - I915_GEM_GPU_DOMAINS); - if (seqno == 0) - return -ENOMEM; - } - - ret = i915_wait_request(dev, seqno); - if (ret) + /* This brings the object to the head of the LRU if it + * had been written to. The only way this should + * result in us waiting longer than the expected + * optimal amount of time is if there was a + * fence-using buffer later that was read-only. + */ + i915_gem_object_flush_gpu_write_domain(old_obj); + ret = i915_gem_object_wait_rendering(old_obj); + if (ret != 0) return ret; - goto try_again; + break; } /* @@ -2296,10 +2300,15 @@ try_again: * for this object next time we need it. */ i915_gem_release_mmap(reg->obj); + i = old_obj_priv->fence_reg; old_obj_priv->fence_reg = I915_FENCE_REG_NONE; + list_del_init(&old_obj_priv->fence_list); + drm_gem_object_unreference(old_obj); } obj_priv->fence_reg = i; + list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list); + reg->obj = obj; if (IS_I965G(dev)) @@ -2342,6 +2351,7 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj) dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL; obj_priv->fence_reg = I915_FENCE_REG_NONE; + list_del_init(&obj_priv->fence_list); } /** @@ -3595,9 +3605,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) * Pre-965 chips need a fence register set up in order to * properly handle tiled surfaces. */ - if (!IS_I965G(dev) && - obj_priv->fence_reg == I915_FENCE_REG_NONE && - obj_priv->tiling_mode != I915_TILING_NONE) { + if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj); if (ret != 0) { if (ret != -EBUSY && ret != -ERESTARTSYS) @@ -3806,6 +3814,7 @@ int i915_gem_init_object(struct drm_gem_object *obj) obj_priv->obj = obj; obj_priv->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj_priv->list); + INIT_LIST_HEAD(&obj_priv->fence_list); return 0; } @@ -4253,6 +4262,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->mm.flushing_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); INIT_LIST_HEAD(&dev_priv->mm.request_list); + INIT_LIST_HEAD(&dev_priv->mm.fence_list); INIT_DELAYED_WORK(&dev_priv->mm.retire_work, i915_gem_retire_work_handler); dev_priv->mm.next_gem_seqno = 1; -- cgit v1.2.1 From 58c2fb647a3cf70a90d7b33a3f2a8396a20a0bc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Sep 2009 12:02:39 +0100 Subject: drm/i915: Unref old_obj on get_fence_reg() error path Remember to release the local reference if we fail to wait on the rendering. (Also whilst in the vicinity add some whitespace so that the phasing of the operations is clearer.) Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0c07a755b3a3..7edb5b9d5792 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2267,8 +2267,6 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) fence_list) { old_obj = old_obj_priv->obj; - reg = &dev_priv->fence_regs[old_obj_priv->fence_reg]; - if (old_obj_priv->pin_count) continue; @@ -2290,8 +2288,11 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) */ i915_gem_object_flush_gpu_write_domain(old_obj); ret = i915_gem_object_wait_rendering(old_obj); - if (ret != 0) + if (ret != 0) { + drm_gem_object_unreference(old_obj); return ret; + } + break; } @@ -2299,10 +2300,14 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) * Zap this virtual mapping so we can set up a fence again * for this object next time we need it. */ - i915_gem_release_mmap(reg->obj); + i915_gem_release_mmap(old_obj); + i = old_obj_priv->fence_reg; + reg = &dev_priv->fence_regs[i]; + old_obj_priv->fence_reg = I915_FENCE_REG_NONE; list_del_init(&old_obj_priv->fence_list); + drm_gem_object_unreference(old_obj); } -- cgit v1.2.1 From 652c393a3368af84359da37c45afc35a91144960 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 17 Aug 2009 13:31:43 -0700 Subject: drm/i915: add dynamic clock frequency control There are several sources of unnecessary power consumption on Intel graphics systems. The first is the LVDS clock. TFTs don't suffer from persistence issues like CRTs, and so we can reduce the LVDS refresh rate when the screen is idle. It will be automatically upclocked when userspace triggers graphical activity. Beyond that, we can enable memory self refresh. This allows the memory to go into a lower power state when the graphics are idle. Finally, we can drop some clocks on the gpu itself. All of these things can be reenabled between frames when GPU activity is triggered, and so there should be no user visible graphical changes. Signed-off-by: Jesse Barnes Signed-off-by: Matthew Garrett Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7edb5b9d5792..73b58193afed 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "intel_drv.h" #include #include @@ -981,6 +982,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_set_domain *args = data; struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; uint32_t read_domains = args->read_domains; uint32_t write_domain = args->write_domain; int ret; @@ -1004,15 +1006,17 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, obj = drm_gem_object_lookup(dev, file_priv, args->handle); if (obj == NULL) return -EBADF; + obj_priv = obj->driver_private; mutex_lock(&dev->struct_mutex); + + intel_mark_busy(dev, obj); + #if WATCH_BUF DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n", obj, obj->size, read_domains, write_domain); #endif if (read_domains & I915_GEM_DOMAIN_GTT) { - struct drm_i915_gem_object *obj_priv = obj->driver_private; - ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); /* Update the LRU on the fence for the CPU access that's @@ -2776,6 +2780,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); + intel_mark_busy(dev, obj); + #if WATCH_BUF DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", __func__, obj, -- cgit v1.2.1 From 0ef82af7253c1929a3995f271b8b0db462d1a0c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 5 Sep 2009 18:07:06 +0100 Subject: drm/i915: Pad ringbuffer with NOOPs before wrapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the docs, the ringbuffer is not allowed to wrap in the middle of an instruction. G45 PRM, Vol 1b, p101: While the “free space” wrap may allow commands to be wrapped around the end of the Ring Buffer, the wrap should only occur between commands. Padding (with NOP) may be required to follow this restriction. Do as commanded. [Having seen bug reports where there is evidence of split commands, but apparently the GPU has continued on merrily before a bizarre and untimely death, this may or may not fix a few random hangs.] Signed-off-by: Chris Wilson CC: Eric Anholt Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 73b58193afed..076752112a39 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4099,7 +4099,6 @@ i915_gem_init_ringbuffer(struct drm_device *dev) /* Set up the kernel mapping for the ring. */ ring->Size = obj->size; - ring->tail_mask = obj->size - 1; ring->map.offset = dev->agp->base + obj_priv->gtt_offset; ring->map.size = obj->size; -- cgit v1.2.1