From 5323fd042f89164927ee8c311f0a975e8c846412 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 9 Sep 2009 11:50:45 -0700 Subject: drm/i915: Zap mmaps of objects before unbinding them from the GTT. Otherwise, some other userland writing into its buffer may race to land writes either after the CPU thinks it's got a coherent view, or after its GTT entries have been redirected to point at the scratch page. Either result is unpleasant. Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_gem.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 954fb699131b..f3758f9fc979 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1915,6 +1915,12 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return -EINVAL; } + /* blow away mappings if mapped through GTT */ + i915_gem_release_mmap(obj); + + if (obj_priv->fence_reg != I915_FENCE_REG_NONE) + i915_gem_clear_fence_reg(obj); + /* Move the object to the CPU domain to ensure that * any possible CPU writes while it's not in the GTT * are flushed when we go to remap it. This will @@ -1928,20 +1934,14 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return ret; } + BUG_ON(obj_priv->active); + if (obj_priv->agp_mem != NULL) { drm_unbind_agp(obj_priv->agp_mem); drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); obj_priv->agp_mem = NULL; } - BUG_ON(obj_priv->active); - - /* blow away mappings if mapped through GTT */ - i915_gem_release_mmap(obj); - - if (obj_priv->fence_reg != I915_FENCE_REG_NONE) - i915_gem_clear_fence_reg(obj); - i915_gem_object_put_pages(obj); if (obj_priv->gtt_space) { -- cgit v1.2.3 From e517a5e97080bbe52857bd0d7df9b66602d53c4d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 10 Sep 2009 17:48:48 -0700 Subject: agp/intel: Fix the pre-9xx chipset flush. Ever since we enabled GEM, the pre-9xx chipsets (particularly 865) have had serious stability issues. Back in May a wbinvd was added to the DRM to work around much of the problem. Some failure remained -- easily visible by dragging a window around on an X -retro desktop, or by looking at bugzilla. The chipset flush was on the right track -- hitting the right amount of memory, and it appears to be the only way to flush on these chipsets, but the flush page was mapped uncached. As a result, the writes trying to clear the writeback cache ended up bypassing the cache, and not flushing anything! The wbinvd would flush out other writeback data and often cause the data we wanted to get flushed, but not always. By removing the setting of the page to UC and instead just clflushing the data we write to try to flush it, we get the desired behavior with no wbinvd. This exports clflush_cache_range(), which was laying around and happened to basically match the code I was otherwise going to copy from the DRM. Signed-off-by: Eric Anholt Signed-off-by: Brice Goglin Cc: stable@kernel.org --- arch/x86/mm/pageattr.c | 1 + drivers/char/agp/intel-agp.c | 30 +++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem.c | 10 ---------- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 7e600c1962db..5866b28eede1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -143,6 +143,7 @@ void clflush_cache_range(void *vaddr, unsigned int size) mb(); } +EXPORT_SYMBOL_GPL(clflush_cache_range); static void __cpa_flush_all(void *arg) { diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index c17291715031..e8dc75fc33cc 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -682,23 +682,39 @@ static void intel_i830_setup_flush(void) if (!intel_private.i8xx_page) return; - /* make page uncached */ - map_page_into_agp(intel_private.i8xx_page); - intel_private.i8xx_flush_page = kmap(intel_private.i8xx_page); if (!intel_private.i8xx_flush_page) intel_i830_fini_flush(); } +static void +do_wbinvd(void *null) +{ + wbinvd(); +} + +/* The chipset_flush interface needs to get data that has already been + * flushed out of the CPU all the way out to main memory, because the GPU + * doesn't snoop those buffers. + * + * The 8xx series doesn't have the same lovely interface for flushing the + * chipset write buffers that the later chips do. According to the 865 + * specs, it's 64 octwords, or 1KB. So, to get those previous things in + * that buffer out, we just fill 1KB and clflush it out, on the assumption + * that it'll push whatever was in there out. It appears to work. + */ static void intel_i830_chipset_flush(struct agp_bridge_data *bridge) { unsigned int *pg = intel_private.i8xx_flush_page; - int i; - for (i = 0; i < 256; i += 2) - *(pg + i) = i; + memset(pg, 0, 1024); - wmb(); + if (cpu_has_clflush) { + clflush_cache_range(pg, 1024); + } else { + if (on_each_cpu(do_wbinvd, NULL, 1) != 0) + printk(KERN_ERR "Timed out waiting for cache flush.\n"); + } } /* The intel i830 automatically initializes the agp aperture during POST. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f3758f9fc979..30ea4b6b0219 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2511,16 +2511,6 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; - /* XXX: The 865 in particular appears to be weird in how it handles - * cache flushing. We haven't figured it out, but the - * clflush+agp_chipset_flush doesn't appear to successfully get the - * data visible to the PGU, while wbinvd + agp_chipset_flush does. - */ - if (IS_I865G(obj->dev)) { - wbinvd(); - return; - } - drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } -- cgit v1.2.3 From 7e61615857c6fb3afbcb43f5c4e97511a923f5a8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 10 Sep 2009 08:53:04 +0100 Subject: drm/i915: Only destroy a constructed mmap offset drm_ht_remove_item() does not handle removing an absent item and the hlist in particular is incorrectly initialised. The easy remedy is simply skip calling i915_gem_free_mmap_offset() unless we have actually created the offset and associated ht entry. This also fixes the mishandling of a partially constructed offset which leaves pointers initialized after freeing them along the i915_gem_create_mmap_offset() error paths. In particular this should fix the oops found here: https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/415357/comments/8 Signed-off-by: Chris Wilson Signed-off-by: Eric Anholt Cc: stable@kernel.org --- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 30ea4b6b0219..e0da98602eed 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3834,7 +3834,8 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_object_unbind(obj); - i915_gem_free_mmap_offset(obj); + if (obj_priv->mmap_offset) + i915_gem_free_mmap_offset(obj); kfree(obj_priv->page_cpu_valid); kfree(obj_priv->bit_17); -- cgit v1.2.3 From ffed1d0920d180962469feb5e14bab7af2e29137 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:41 -0400 Subject: drm/i915: Check whether chip is wedged in i915_wait_request() i915_wait_request() only checks mm.wedged after it interacts with the hardware, generally causing the driver to lock up waiting for a wedged chip. Make sure we check mm.wedged as the first thing we do. Reported-by: Chris Wilson Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e0da98602eed..910051321104 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1751,6 +1751,9 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) BUG_ON(seqno == 0); + if (dev_priv->mm.wedged) + return -EIO; + if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { if (IS_IGDNG(dev)) ier = I915_READ(DEIER) | I915_READ(GTIER); -- cgit v1.2.3 From 22be172423b0007a02a06d70db8aeb4d9e64c6b3 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:43 -0400 Subject: drm/i915: make i915_seqno_passed non-static We'll need it in i915_irq.c for checking whether there are outstanding requests. Also, the function really ought to return a bool, not an int. Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 01b292003af4..933d832aeff7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -709,6 +709,7 @@ int i915_gem_object_unbind(struct drm_gem_object *obj); void i915_gem_release_mmap(struct drm_gem_object *obj); void i915_gem_lastclose(struct drm_device *dev); uint32_t i915_get_gem_seqno(struct drm_device *dev); +bool i915_seqno_passed(uint32_t seq1, uint32_t seq2); int i915_gem_object_get_fence_reg(struct drm_gem_object *obj); int i915_gem_object_put_fence_reg(struct drm_gem_object *obj); void i915_gem_retire_requests(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 910051321104..c70e91b51f67 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1671,7 +1671,7 @@ out: /** * Returns true if seq1 is later than seq2. */ -static int +bool i915_seqno_passed(uint32_t seq1, uint32_t seq2) { return (int32_t)(seq1 - seq2) >= 0; -- cgit v1.2.3 From f65d94211e2bcba17faf05a6a3809af0e4217767 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:44 -0400 Subject: drm/i915: Add hangcheck timer We set a periodic timer to check on the GPU, resetting it every time a batch is completed. If the timer elapses, we check acthd. If acthd hasn't changed in two timer periods, we assume the chip is wedged. This is implemented in such a way that it leaves the option open to employ adaptive timer intervals in the future. One could wait until several timer periods have elapsed before declaring the chip dead. If the chip comes back after several periods but before the "dead" threshold, the timer interval or dead threshold could be raised. It is important to note that while checking for active requests, we need to account for the fact that requests are removed from the list (i.e. retired) in a deferred work queue handler. This means that merely checking for an empty request_list is insufficient; the list could be non-empty yet the GPU still idle, causing the hangcheck timer to incorrectly mark the GPU as wedged (it took me a while to figure that out---sigh...) Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 7 ++++++ drivers/gpu/drm/i915/i915_gem.c | 8 +++++-- drivers/gpu/drm/i915/i915_irq.c | 49 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 7a73b2941eb7..08a5048335e1 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1447,6 +1447,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) if (!IS_IGDNG(dev)) intel_opregion_init(dev, 0); + setup_timer(&dev_priv->hangcheck_timer, i915_hangcheck_elapsed, + (unsigned long) dev); return 0; out_workqueue_free: @@ -1467,6 +1469,7 @@ int i915_driver_unload(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; destroy_workqueue(dev_priv->wq); + del_timer_sync(&dev_priv->hangcheck_timer); io_mapping_free(dev_priv->mm.gtt_mapping); if (dev_priv->mm.gtt_mtrr >= 0) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 933d832aeff7..afbcaa9866f4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -203,6 +203,12 @@ typedef struct drm_i915_private { unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds; int vblank_pipe; + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 75 /* in jiffies */ + struct timer_list hangcheck_timer; + int hangcheck_count; + uint32_t last_acthd; + bool cursor_needs_physical; struct drm_mm vram; @@ -620,6 +626,7 @@ extern int i915_emit_box(struct drm_device *dev, int i, int DR1, int DR4); /* i915_irq.c */ +void i915_hangcheck_elapsed(unsigned long data); extern int i915_irq_emit(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int i915_irq_wait(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c70e91b51f67..579b3b04ff12 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1584,8 +1584,11 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, } - if (was_empty && !dev_priv->mm.suspended) - queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + if (!dev_priv->mm.suspended) { + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + if (was_empty) + queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ); + } return seqno; } @@ -3896,6 +3899,7 @@ i915_gem_idle(struct drm_device *dev) * We need to replace this with a semaphore, or something. */ dev_priv->mm.suspended = 1; + del_timer(&dev_priv->hangcheck_timer); /* Cancel the retire work handler, wait for it to finish if running */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6c89f2ff2495..77e42e719d7d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -601,6 +601,8 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) if (iir & I915_USER_INTERRUPT) { dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); DRM_WAKEUP(&dev_priv->irq_queue); + dev_priv->hangcheck_count = 0; + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); } if (pipea_stats & vblank_status) { @@ -880,6 +882,53 @@ int i915_vblank_swap(struct drm_device *dev, void *data, return -EINVAL; } +struct drm_i915_gem_request *i915_get_tail_request(struct drm_device *dev) { + drm_i915_private_t *dev_priv = dev->dev_private; + return list_entry(dev_priv->mm.request_list.prev, struct drm_i915_gem_request, list); +} + +/** + * This is called when the chip hasn't reported back with completed + * batchbuffers in a long time. The first time this is called we simply record + * ACTHD. If ACTHD hasn't changed by the time the hangcheck timer elapses + * again, we assume the chip is wedged and try to fix it. + */ +void i915_hangcheck_elapsed(unsigned long data) +{ + struct drm_device *dev = (struct drm_device *)data; + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t acthd; + + if (!IS_I965G(dev)) + acthd = I915_READ(ACTHD); + else + acthd = I915_READ(ACTHD_I965); + + /* If all work is done then ACTHD clearly hasn't advanced. */ + if (list_empty(&dev_priv->mm.request_list) || + i915_seqno_passed(i915_get_gem_seqno(dev), i915_get_tail_request(dev)->seqno)) { + dev_priv->hangcheck_count = 0; + return; + } + + if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) { + DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); + dev_priv->mm.wedged = true; /* Hopefully this is atomic */ + i915_handle_error(dev); + return; + } + + /* Reset timer case chip hangs without another request being added */ + mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); + + if (acthd != dev_priv->last_acthd) + dev_priv->hangcheck_count = 0; + else + dev_priv->hangcheck_count++; + + dev_priv->last_acthd = acthd; +} + /* drm_dma.h hooks */ static void igdng_irq_preinstall(struct drm_device *dev) -- cgit v1.2.3 From ba1234d17b3b1fe7087defb191a3c705f208aca6 Mon Sep 17 00:00:00 2001 From: Ben Gamari Date: Mon, 14 Sep 2009 17:48:47 -0400 Subject: drm/i915: Make dev_priv->mm.wedged an atomic_t There is a very real possibility that multiple CPUs will notice that the GPU is wedged. This introduces all sorts of potential race conditions. Make the wedged flag atomic to mitigate this risk. Signed-off-by: Ben Gamari Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 18 +++++++++--------- drivers/gpu/drm/i915/i915_irq.c | 15 ++++++++------- 3 files changed, 18 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 42142f269765..bcc1be281de6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -450,7 +450,7 @@ typedef struct drm_i915_private { * It prevents command submission from occuring and makes * every pending request fail */ - int wedged; + atomic_t wedged; /** Bit 6 swizzling required for X tiling */ uint32_t bit_6_swizzle_x; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 579b3b04ff12..f0f6f668a61e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1712,7 +1712,7 @@ i915_gem_retire_requests(struct drm_device *dev) retiring_seqno = request->seqno; if (i915_seqno_passed(seqno, retiring_seqno) || - dev_priv->mm.wedged) { + atomic_read(&dev_priv->mm.wedged)) { i915_gem_retire_request(dev, request); list_del(&request->list); @@ -1754,7 +1754,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) BUG_ON(seqno == 0); - if (dev_priv->mm.wedged) + if (atomic_read(&dev_priv->mm.wedged)) return -EIO; if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { @@ -1774,11 +1774,11 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) ret = wait_event_interruptible(dev_priv->irq_queue, i915_seqno_passed(i915_get_gem_seqno(dev), seqno) || - dev_priv->mm.wedged); + atomic_read(&dev_priv->mm.wedged)); i915_user_irq_put(dev); dev_priv->mm.waiting_gem_seqno = 0; } - if (dev_priv->mm.wedged) + if (atomic_read(&dev_priv->mm.wedged)) ret = -EIO; if (ret && ret != -ERESTARTSYS) @@ -3359,7 +3359,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, i915_verify_inactive(dev, __FILE__, __LINE__); - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Execbuf while wedged\n"); mutex_unlock(&dev->struct_mutex); ret = -EIO; @@ -3929,7 +3929,7 @@ i915_gem_idle(struct drm_device *dev) if (last_seqno == cur_seqno) { if (stuck++ > 100) { DRM_ERROR("hardware wedged\n"); - dev_priv->mm.wedged = 1; + atomic_set(&dev_priv->mm.wedged, 1); DRM_WAKEUP(&dev_priv->irq_queue); break; } @@ -3942,7 +3942,7 @@ i915_gem_idle(struct drm_device *dev) i915_gem_retire_requests(dev); spin_lock(&dev_priv->mm.active_list_lock); - if (!dev_priv->mm.wedged) { + if (!atomic_read(&dev_priv->mm.wedged)) { /* Active and flushing should now be empty as we've * waited for a sequence higher than any pending execbuffer */ @@ -4204,9 +4204,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - dev_priv->mm.wedged = 0; + atomic_set(&dev_priv->mm.wedged, 0); } mutex_lock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8f5276614ce2..13e664ddb611 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -309,12 +309,12 @@ static void i915_error_work_func(struct work_struct *work) DRM_DEBUG("generating error event\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event); - if (dev_priv->mm.wedged) { + if (atomic_read(&dev_priv->mm.wedged)) { if (IS_I965G(dev)) { DRM_DEBUG("resetting chip\n"); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_event); if (!i965_reset(dev, GDRST_RENDER)) { - dev_priv->mm.wedged = 0; + atomic_set(&dev_priv->mm.wedged, 0); kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, reset_done_event); } } else { @@ -385,7 +385,7 @@ out: * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -static void i915_handle_error(struct drm_device *dev) +static void i915_handle_error(struct drm_device *dev, bool wedged) { struct drm_i915_private *dev_priv = dev->dev_private; u32 eir = I915_READ(EIR); @@ -495,7 +495,9 @@ static void i915_handle_error(struct drm_device *dev) I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT); } - if (dev_priv->mm.wedged) { + if (wedged) { + atomic_set(&dev_priv->mm.wedged, 1); + /* * Wakeup waiting processes so they don't hang */ @@ -548,7 +550,7 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) pipeb_stats = I915_READ(PIPEBSTAT); if (iir & I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) - i915_handle_error(dev); + i915_handle_error(dev, false); /* * Clear the PIPE(A|B)STAT regs before the IIR @@ -934,8 +936,7 @@ void i915_hangcheck_elapsed(unsigned long data) if (dev_priv->last_acthd == acthd && dev_priv->hangcheck_count > 0) { DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); - dev_priv->mm.wedged = true; /* Hopefully this is atomic */ - i915_handle_error(dev); + i915_handle_error(dev, true); return; } -- cgit v1.2.3 From 4960aaca14010b9ff92e5726dd178cbd6805d412 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Sep 2009 16:50:25 +0100 Subject: drm/i915: Add buffer to inactive list immediately during fault If we failed to set the domain, the buffer was no longer being tracked on any list. Cc: stable@kernel.org Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f0f6f668a61e..441088b37d9f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1160,14 +1160,13 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; } + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) { mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; } - - list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); } /* Need a new fence register? */ -- cgit v1.2.3 From e67b8ce1b59006ba41245838db60b6fcda365ba8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Sep 2009 16:50:26 +0100 Subject: drm/i915: Remove stored gtt_alignment There is no need to store the gtt_alignment as it is either explicitly set according to the hardware requirements (e.g. scanout) or the minimum alignment is computed on demand. Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.h | 5 +---- drivers/gpu/drm/i915/i915_gem.c | 14 ++------------ 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a9c671529885..07214694b14f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -510,10 +510,7 @@ struct drm_i915_gem_object { * This is the same as gtt_space->start */ uint32_t gtt_offset; - /** - * Required alignment for the object - */ - uint32_t gtt_alignment; + /** * Fake offset for use by mmap(2) */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 441088b37d9f..77cc6f57166c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1155,7 +1155,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Now bind it into the GTT if needed */ mutex_lock(&dev->struct_mutex); if (!obj_priv->gtt_space) { - ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); + ret = i915_gem_object_bind_to_gtt(obj, 0); if (ret) { mutex_unlock(&dev->struct_mutex); return VM_FAULT_SIGBUS; @@ -1398,22 +1398,12 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, args->offset = obj_priv->mmap_offset; - obj_priv->gtt_alignment = i915_gem_get_gtt_alignment(obj); - - /* Make sure the alignment is correct for fence regs etc */ - if (obj_priv->agp_mem && - (obj_priv->gtt_offset & (obj_priv->gtt_alignment - 1))) { - drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - /* * Pull it into the GTT so that we have a page list (makes the * initial fault faster and any subsequent flushing possible). */ if (!obj_priv->agp_mem) { - ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); + ret = i915_gem_object_bind_to_gtt(obj, 0); if (ret) { drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); -- cgit v1.2.3 From 31169714fc928aed4e945b959dca2bedd259b9c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Sep 2009 16:50:28 +0100 Subject: drm/i915: Register a shrinker to free inactive lists under memory pressure This should help GEM handle memory pressure sitatuions more gracefully. Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_drv.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 12 ++++ drivers/gpu/drm/i915/i915_gem.c | 144 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 1f9e4503b072..c57c1744cecf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -362,6 +362,8 @@ static int __init i915_init(void) { driver.num_ioctls = i915_max_ioctl; + i915_gem_shrinker_init(); + /* * If CONFIG_DRM_I915_KMS is set, default to KMS unless * explicitly disabled with the module pararmeter. @@ -388,6 +390,7 @@ static int __init i915_init(void) static void __exit i915_exit(void) { + i915_gem_shrinker_exit(); drm_exit(&driver); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07214694b14f..bbcf5fc72666 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -368,6 +368,15 @@ typedef struct drm_i915_private { struct io_mapping *gtt_mapping; int gtt_mtrr; + /** + * Membership on list of all loaded devices, used to evict + * inactive buffers under memory pressure. + * + * Modifications should only be done whilst holding the + * shrink_list_lock spinlock. + */ + struct list_head shrink_list; + /** * List of objects currently involved in rendering from the * ringbuffer. @@ -741,6 +750,9 @@ int i915_gem_object_get_pages(struct drm_gem_object *obj); void i915_gem_object_put_pages(struct drm_gem_object *obj); void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv); +void i915_gem_shrinker_init(void); +void i915_gem_shrinker_exit(void); + /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 77cc6f57166c..2fff2e0a976e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -53,6 +53,9 @@ static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *o struct drm_i915_gem_pwrite *args, struct drm_file *file_priv); +static LIST_HEAD(shrink_list); +static DEFINE_SPINLOCK(shrink_list_lock); + int i915_gem_do_init(struct drm_device *dev, unsigned long start, unsigned long end) { @@ -4265,6 +4268,10 @@ i915_gem_load(struct drm_device *dev) i915_gem_retire_work_handler); dev_priv->mm.next_gem_seqno = 1; + spin_lock(&shrink_list_lock); + list_add(&dev_priv->mm.shrink_list, &shrink_list); + spin_unlock(&shrink_list_lock); + /* Old X drivers will take 0-2 for front, back, depth buffers */ dev_priv->fence_reg_start = 3; @@ -4482,3 +4489,140 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) list_del_init(i915_file_priv->mm.request_list.next); mutex_unlock(&dev->struct_mutex); } + +/* Immediately discard the backing storage */ +static void +i915_gem_object_truncate(struct drm_gem_object *obj) +{ + struct inode *inode; + + inode = obj->filp->f_path.dentry->d_inode; + + mutex_lock(&inode->i_mutex); + truncate_inode_pages(inode->i_mapping, 0); + mutex_unlock(&inode->i_mutex); +} + +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) +{ + return !obj_priv->dirty; +} + +static int +i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) +{ + drm_i915_private_t *dev_priv, *next_dev; + struct drm_i915_gem_object *obj_priv, *next_obj; + int cnt = 0; + int would_deadlock = 1; + + /* "fast-path" to count number of available objects */ + if (nr_to_scan == 0) { + spin_lock(&shrink_list_lock); + list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (mutex_trylock(&dev->struct_mutex)) { + list_for_each_entry(obj_priv, + &dev_priv->mm.inactive_list, + list) + cnt++; + mutex_unlock(&dev->struct_mutex); + } + } + spin_unlock(&shrink_list_lock); + + return (cnt / 100) * sysctl_vfs_cache_pressure; + } + + spin_lock(&shrink_list_lock); + + /* first scan for clean buffers */ + list_for_each_entry_safe(dev_priv, next_dev, + &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (! mutex_trylock(&dev->struct_mutex)) + continue; + + spin_unlock(&shrink_list_lock); + + i915_gem_retire_requests(dev); + + list_for_each_entry_safe(obj_priv, next_obj, + &dev_priv->mm.inactive_list, + list) { + if (i915_gem_object_is_purgeable(obj_priv)) { + struct drm_gem_object *obj = obj_priv->obj; + i915_gem_object_unbind(obj); + i915_gem_object_truncate(obj); + + if (--nr_to_scan <= 0) + break; + } + } + + spin_lock(&shrink_list_lock); + mutex_unlock(&dev->struct_mutex); + + if (nr_to_scan <= 0) + break; + } + + /* second pass, evict/count anything still on the inactive list */ + list_for_each_entry_safe(dev_priv, next_dev, + &shrink_list, mm.shrink_list) { + struct drm_device *dev = dev_priv->dev; + + if (! mutex_trylock(&dev->struct_mutex)) + continue; + + spin_unlock(&shrink_list_lock); + + list_for_each_entry_safe(obj_priv, next_obj, + &dev_priv->mm.inactive_list, + list) { + if (nr_to_scan > 0) { + struct drm_gem_object *obj = obj_priv->obj; + i915_gem_object_unbind(obj); + if (i915_gem_object_is_purgeable(obj_priv)) + i915_gem_object_truncate(obj); + + nr_to_scan--; + } else + cnt++; + } + + spin_lock(&shrink_list_lock); + mutex_unlock(&dev->struct_mutex); + + would_deadlock = 0; + } + + spin_unlock(&shrink_list_lock); + + if (would_deadlock) + return -1; + else if (cnt > 0) + return (cnt / 100) * sysctl_vfs_cache_pressure; + else + return 0; +} + +static struct shrinker shrinker = { + .shrink = i915_gem_shrink, + .seeks = DEFAULT_SEEKS, +}; + +__init void +i915_gem_shrinker_init(void) +{ + register_shrinker(&shrinker); +} + +__exit void +i915_gem_shrinker_exit(void) +{ + unregister_shrinker(&shrinker); +} -- cgit v1.2.3 From 3ef94daae7530b4ebcd2e5f48f1028cd2d2470ba Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Sep 2009 16:50:29 +0100 Subject: drm/i915: Add ioctl to set 'purgeability' of objects Similar to the madvise() concept, the application may wish to mark some data as volatile. That is in the event of memory pressure the kernel is free to discard such buffers safe in the knowledge that the application can recreate them on demand, and is simply using these as a cache. Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 7 ++++ drivers/gpu/drm/i915/i915_gem.c | 81 +++++++++++++++++++++++++++++++++++++---- include/drm/i915_drm.h | 18 +++++++++ 4 files changed, 99 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index f47adb4aa59a..250999cdf814 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1616,6 +1616,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0), DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0), DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0), + DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bbcf5fc72666..2f89c2136be9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -566,6 +566,11 @@ struct drm_i915_gem_object { * in an execbuffer object list. */ int in_execbuffer; + + /** + * Advice: are the backing pages purgeable? + */ + int madv; }; /** @@ -705,6 +710,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2fff2e0a976e..2ab30f251943 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1436,13 +1436,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) if (obj_priv->tiling_mode != I915_TILING_NONE) i915_gem_object_save_bit_17_swizzle(obj); - for (i = 0; i < page_count; i++) - if (obj_priv->pages[i] != NULL) { - if (obj_priv->dirty) - set_page_dirty(obj_priv->pages[i]); - mark_page_accessed(obj_priv->pages[i]); - page_cache_release(obj_priv->pages[i]); - } + if (obj_priv->madv == I915_MADV_DONTNEED) + obj_priv->dirty = 0; + + for (i = 0; i < page_count; i++) { + if (obj_priv->pages[i] == NULL) + break; + + if (obj_priv->dirty) + set_page_dirty(obj_priv->pages[i]); + + if (obj_priv->madv == I915_MADV_WILLNEED) + mark_page_accessed(obj_priv->pages[i]); + + page_cache_release(obj_priv->pages[i]); + } obj_priv->dirty = 0; drm_free_large(obj_priv->pages); @@ -2412,6 +2420,12 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) if (dev_priv->mm.suspended) return -EBUSY; + + if (obj_priv->madv == I915_MADV_DONTNEED) { + DRM_ERROR("Attempting to bind a purgeable object\n"); + return -EINVAL; + } + if (alignment == 0) alignment = i915_gem_get_gtt_alignment(obj); if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) { @@ -3679,6 +3693,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, } obj_priv = obj->driver_private; + if (obj_priv->madv == I915_MADV_DONTNEED) { + DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n"); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; + } + if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) { DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", args->handle); @@ -3791,6 +3812,49 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data, return i915_gem_ring_throttle(dev, file_priv); } +int +i915_gem_madvise_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_madvise *args = data; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + switch (args->madv) { + case I915_MADV_DONTNEED: + case I915_MADV_WILLNEED: + break; + default: + return -EINVAL; + } + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) { + DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n", + args->handle); + return -EBADF; + } + + mutex_lock(&dev->struct_mutex); + obj_priv = obj->driver_private; + + if (obj_priv->pin_count) { + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + + DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n"); + return -EINVAL; + } + + obj_priv->madv = args->madv; + args->retained = obj_priv->gtt_space != NULL; + + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + + return 0; +} + int i915_gem_init_object(struct drm_gem_object *obj) { struct drm_i915_gem_object *obj_priv; @@ -3815,6 +3879,7 @@ int i915_gem_init_object(struct drm_gem_object *obj) obj_priv->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj_priv->list); INIT_LIST_HEAD(&obj_priv->fence_list); + obj_priv->madv = I915_MADV_WILLNEED; return 0; } @@ -4506,7 +4571,7 @@ i915_gem_object_truncate(struct drm_gem_object *obj) static inline int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) { - return !obj_priv->dirty; + return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED; } static int diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 8e1e92583fbc..607c9da061e8 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_GET_APERTURE 0x23 #define DRM_I915_GEM_MMAP_GTT 0x24 #define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25 +#define DRM_I915_GEM_MADVISE 0x26 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling) #define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture) #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id) +#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -667,4 +669,20 @@ struct drm_i915_get_pipe_from_crtc_id { __u32 pipe; }; +#define I915_MADV_WILLNEED 0 +#define I915_MADV_DONTNEED 1 + +struct drm_i915_gem_madvise { + /** Handle of the buffer to change the backing store advice */ + __u32 handle; + + /* Advice: either the buffer will be needed again in the near future, + * or wont be and could be discarded under memory pressure. + */ + __u32 madv; + + /** Whether the backing store still exists. */ + __u32 retained; +}; + #endif /* _I915_DRM_H_ */ -- cgit v1.2.3 From 07f73f6912667621276b002e33844ef283d98203 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 14 Sep 2009 16:50:30 +0100 Subject: drm/i915: Improve behaviour under memory pressure Due to the necessity of having to take the struct_mutex, the i915 shrinker can not free the inactive lists if we fail to allocate memory whilst processing a batch buffer, triggering an OOM and an ENOMEM that is reported back to userspace. In order to fare better under such circumstances we need to manually retry a failed allocation after evicting inactive buffers. To do so involves 3 steps: 1. Marking the backing shm pages as NORETRY. 2. Updating the get_pages() callers to evict something on failure and then retry. 3. Revamping the evict something logic to be smarter about the required buffer size and prefer to use volatile or clean inactive pages. Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/drm_gem.c | 13 ++ drivers/gpu/drm/i915/i915_gem.c | 313 +++++++++++++++++++++++++++++----------- 2 files changed, 245 insertions(+), 81 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 230c9ffdd5e9..80391995bdec 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -142,6 +142,19 @@ drm_gem_object_alloc(struct drm_device *dev, size_t size) if (IS_ERR(obj->filp)) goto free; + /* Basically we want to disable the OOM killer and handle ENOMEM + * ourselves by sacrificing pages from cached buffers. + * XXX shmem_file_[gs]et_gfp_mask() + */ + mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, + GFP_HIGHUSER | + __GFP_COLD | + __GFP_FS | + __GFP_RECLAIMABLE | + __GFP_NORETRY | + __GFP_NOWARN | + __GFP_NOMEMALLOC); + kref_init(&obj->refcount); kref_init(&obj->handlecount); obj->size = size; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2ab30f251943..725b4484a092 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,7 +48,9 @@ static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); -static int i915_gem_evict_something(struct drm_device *dev); +static int i915_gem_evict_something(struct drm_device *dev, int min_size); +static int i915_gem_evict_from_list(struct drm_device *dev, + struct list_head *head); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file_priv); @@ -319,6 +321,45 @@ fail_unlock: return ret; } +static inline gfp_t +i915_gem_object_get_page_gfp_mask (struct drm_gem_object *obj) +{ + return mapping_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping); +} + +static inline void +i915_gem_object_set_page_gfp_mask (struct drm_gem_object *obj, gfp_t gfp) +{ + mapping_set_gfp_mask(obj->filp->f_path.dentry->d_inode->i_mapping, gfp); +} + +static int +i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj) +{ + int ret; + + ret = i915_gem_object_get_pages(obj); + + /* If we've insufficient memory to map in the pages, attempt + * to make some space by throwing out some old buffers. + */ + if (ret == -ENOMEM) { + struct drm_device *dev = obj->dev; + gfp_t gfp; + + ret = i915_gem_evict_something(dev, obj->size); + if (ret) + return ret; + + gfp = i915_gem_object_get_page_gfp_mask(obj); + i915_gem_object_set_page_gfp_mask(obj, gfp & ~__GFP_NORETRY); + ret = i915_gem_object_get_pages(obj); + i915_gem_object_set_page_gfp_mask (obj, gfp); + } + + return ret; +} + /** * This is the fallback shmem pread path, which allocates temporary storage * in kernel space to copy_to_user into outside of the struct_mutex, so we @@ -370,8 +411,8 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_get_pages(obj); - if (ret != 0) + ret = i915_gem_object_get_pages_or_evict(obj); + if (ret) goto fail_unlock; ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, @@ -845,8 +886,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_get_pages(obj); - if (ret != 0) + ret = i915_gem_object_get_pages_or_evict(obj); + if (ret) goto fail_unlock; ret = i915_gem_object_set_to_cpu_domain(obj, 1); @@ -1965,37 +2006,127 @@ i915_gem_object_unbind(struct drm_gem_object *obj) return 0; } +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) +{ + return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED; +} + +static struct drm_gem_object * +i915_gem_find_inactive_object(struct drm_device *dev, int min_size) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *best = NULL; + struct drm_gem_object *first = NULL; + + /* Try to find the smallest clean object */ + list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { + struct drm_gem_object *obj = obj_priv->obj; + if (obj->size >= min_size) { + if (i915_gem_object_is_purgeable(obj_priv) && + (!best || obj->size < best->size)) { + best = obj; + if (best->size == min_size) + return best; + } + if (!first) + first = obj; + } + } + + return best ? best : first; +} + +static int +i915_gem_evict_everything(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t seqno; + int ret; + bool lists_empty; + + DRM_INFO("GTT full, evicting everything: " + "%d objects [%d pinned], " + "%d object bytes [%d pinned], " + "%d/%d gtt bytes\n", + atomic_read(&dev->object_count), + atomic_read(&dev->pin_count), + atomic_read(&dev->object_memory), + atomic_read(&dev->pin_memory), + atomic_read(&dev->gtt_memory), + dev->gtt_total); + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + + if (lists_empty) { + DRM_ERROR("GTT full, but lists empty!\n"); + return -ENOSPC; + } + + /* Flush everything (on to the inactive lists) and evict */ + i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); + seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS); + if (seqno == 0) + return -ENOMEM; + + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; + + ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); + if (ret) + return ret; + + spin_lock(&dev_priv->mm.active_list_lock); + lists_empty = (list_empty(&dev_priv->mm.inactive_list) && + list_empty(&dev_priv->mm.flushing_list) && + list_empty(&dev_priv->mm.active_list)); + spin_unlock(&dev_priv->mm.active_list_lock); + BUG_ON(!lists_empty); + + return 0; +} + static int -i915_gem_evict_something(struct drm_device *dev) +i915_gem_evict_something(struct drm_device *dev, int min_size) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_gem_object *obj; - struct drm_i915_gem_object *obj_priv; - int ret = 0; + int have_waited = 0; + int ret; for (;;) { + i915_gem_retire_requests(dev); + /* If there's an inactive buffer available now, grab it * and be done. */ - if (!list_empty(&dev_priv->mm.inactive_list)) { - obj_priv = list_first_entry(&dev_priv->mm.inactive_list, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; - BUG_ON(obj_priv->pin_count != 0); + obj = i915_gem_find_inactive_object(dev, min_size); + if (obj) { + struct drm_i915_gem_object *obj_priv; + #if WATCH_LRU DRM_INFO("%s: evicting %p\n", __func__, obj); #endif + obj_priv = obj->driver_private; + BUG_ON(obj_priv->pin_count != 0); BUG_ON(obj_priv->active); /* Wait on the rendering and unbind the buffer. */ - ret = i915_gem_object_unbind(obj); - break; + return i915_gem_object_unbind(obj); } + if (have_waited) + return 0; + /* If we didn't get anything, but the ring is still processing - * things, wait for one of those things to finish and hopefully - * leave us a buffer to evict. + * things, wait for the next to finish and hopefully leave us + * a buffer to evict. */ if (!list_empty(&dev_priv->mm.request_list)) { struct drm_i915_gem_request *request; @@ -2006,16 +2137,10 @@ i915_gem_evict_something(struct drm_device *dev) ret = i915_wait_request(dev, request->seqno); if (ret) - break; + return ret; - /* if waiting caused an object to become inactive, - * then loop around and wait for it. Otherwise, we - * assume that waiting freed and unbound something, - * so there should now be some space in the GTT - */ - if (!list_empty(&dev_priv->mm.inactive_list)) - continue; - break; + have_waited = 1; + continue; } /* If we didn't have anything on the request list but there @@ -2024,6 +2149,9 @@ i915_gem_evict_something(struct drm_device *dev) * will get moved to inactive. */ if (!list_empty(&dev_priv->mm.flushing_list)) { + struct drm_i915_gem_object *obj_priv; + uint32_t seqno; + obj_priv = list_first_entry(&dev_priv->mm.flushing_list, struct drm_i915_gem_object, list); @@ -2032,38 +2160,29 @@ i915_gem_evict_something(struct drm_device *dev) i915_gem_flush(dev, obj->write_domain, obj->write_domain); - i915_add_request(dev, NULL, obj->write_domain); + seqno = i915_add_request(dev, NULL, obj->write_domain); + if (seqno == 0) + return -ENOMEM; + + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; - obj = NULL; + have_waited = 1; continue; } - DRM_ERROR("inactive empty %d request empty %d " - "flushing empty %d\n", - list_empty(&dev_priv->mm.inactive_list), - list_empty(&dev_priv->mm.request_list), - list_empty(&dev_priv->mm.flushing_list)); - /* If we didn't do any of the above, there's nothing to be done - * and we just can't fit it in. + /* If we didn't do any of the above, there's no single buffer + * large enough to swap out for the new one, so just evict + * everything and start again. (This should be rare.) */ - return -ENOSPC; - } - return ret; -} - -static int -i915_gem_evict_everything(struct drm_device *dev) -{ - int ret; - - for (;;) { - ret = i915_gem_evict_something(dev); - if (ret != 0) - break; + if (!list_empty (&dev_priv->mm.inactive_list)) { + DRM_INFO("GTT full, evicting inactive buffers\n"); + return i915_gem_evict_from_list(dev, + &dev_priv->mm.inactive_list); + } else + return i915_gem_evict_everything(dev); } - if (ret == -ENOSPC) - return 0; - return ret; } int @@ -2086,7 +2205,7 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) BUG_ON(obj_priv->pages != NULL); obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { - DRM_ERROR("Faled to allocate page list\n"); + DRM_ERROR("Failed to allocate page list\n"); obj_priv->pages_refcount--; return -ENOMEM; } @@ -2097,7 +2216,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) page = read_mapping_page(mapping, i, NULL); if (IS_ERR(page)) { ret = PTR_ERR(page); - DRM_ERROR("read_mapping_page failed: %d\n", ret); i915_gem_object_put_pages(obj); return ret; } @@ -2416,7 +2534,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj_priv = obj->driver_private; struct drm_mm_node *free_space; - int page_count, ret; + bool retry_alloc = false; + int ret; if (dev_priv->mm.suspended) return -EBUSY; @@ -2445,25 +2564,13 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) } } if (obj_priv->gtt_space == NULL) { - bool lists_empty; - /* If the gtt is empty and we're still having trouble * fitting our object in, we're out of memory. */ #if WATCH_LRU DRM_INFO("%s: GTT full, evicting something\n", __func__); #endif - spin_lock(&dev_priv->mm.active_list_lock); - lists_empty = (list_empty(&dev_priv->mm.inactive_list) && - list_empty(&dev_priv->mm.flushing_list) && - list_empty(&dev_priv->mm.active_list)); - spin_unlock(&dev_priv->mm.active_list_lock); - if (lists_empty) { - DRM_ERROR("GTT full, but LRU list empty\n"); - return -ENOSPC; - } - - ret = i915_gem_evict_something(dev); + ret = i915_gem_evict_something(dev, obj->size); if (ret != 0) { if (ret != -ERESTARTSYS) DRM_ERROR("Failed to evict a buffer %d\n", ret); @@ -2476,27 +2583,62 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) DRM_INFO("Binding object of size %zd at 0x%08x\n", obj->size, obj_priv->gtt_offset); #endif + if (retry_alloc) { + i915_gem_object_set_page_gfp_mask (obj, + i915_gem_object_get_page_gfp_mask (obj) & ~__GFP_NORETRY); + } ret = i915_gem_object_get_pages(obj); + if (retry_alloc) { + i915_gem_object_set_page_gfp_mask (obj, + i915_gem_object_get_page_gfp_mask (obj) | __GFP_NORETRY); + } if (ret) { drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; + + if (ret == -ENOMEM) { + /* first try to clear up some space from the GTT */ + ret = i915_gem_evict_something(dev, obj->size); + if (ret) { + if (ret != -ERESTARTSYS) + DRM_ERROR("Failed to allocate space for backing pages %d\n", ret); + + /* now try to shrink everyone else */ + if (! retry_alloc) { + retry_alloc = true; + goto search_free; + } + + return ret; + } + + goto search_free; + } + return ret; } - page_count = obj->size / PAGE_SIZE; /* Create an AGP memory structure pointing at our pages, and bind it * into the GTT. */ obj_priv->agp_mem = drm_agp_bind_pages(dev, obj_priv->pages, - page_count, + obj->size >> PAGE_SHIFT, obj_priv->gtt_offset, obj_priv->agp_type); if (obj_priv->agp_mem == NULL) { i915_gem_object_put_pages(obj); drm_mm_put_block(obj_priv->gtt_space); obj_priv->gtt_space = NULL; - return -ENOMEM; + + ret = i915_gem_evict_something(dev, obj->size); + if (ret) { + if (ret != -ERESTARTSYS) + DRM_ERROR("Failed to allocate space to bind AGP: %d\n", ret); + return ret; + } + + goto search_free; } atomic_inc(&dev->gtt_count); atomic_add(obj->size, &dev->gtt_memory); @@ -3423,8 +3565,23 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* error other than GTT full, or we've already tried again */ if (ret != -ENOSPC || pin_tries >= 1) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to pin buffers %d\n", ret); + if (ret != -ERESTARTSYS) { + unsigned long long total_size = 0; + for (i = 0; i < args->buffer_count; i++) + total_size += object_list[i]->size; + DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes: %d\n", + pinned+1, args->buffer_count, + total_size, ret); + DRM_ERROR("%d objects [%d pinned], " + "%d object bytes [%d pinned], " + "%d/%d gtt bytes\n", + atomic_read(&dev->object_count), + atomic_read(&dev->pin_count), + atomic_read(&dev->object_memory), + atomic_read(&dev->pin_memory), + atomic_read(&dev->gtt_memory), + dev->gtt_total); + } goto err; } @@ -3435,7 +3592,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, /* evict everyone we can from the aperture */ ret = i915_gem_evict_everything(dev); - if (ret) + if (ret && ret != -ENOSPC) goto err; } @@ -4568,12 +4725,6 @@ i915_gem_object_truncate(struct drm_gem_object *obj) mutex_unlock(&inode->i_mutex); } -static inline int -i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) -{ - return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED; -} - static int i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) { -- cgit v1.2.3 From cd0b9fb400ba775737bdc3874c4cbee4047e66d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 15 Sep 2009 23:23:18 +0100 Subject: drm/i915: Check that the relocation points to within the target Eric noted a potential concern with the low bits not being strictly used as part of the absolute offset (instead part of the command stream to the GPU), but in practice that should not be an issue. Signed-off-by: Chris Wilson Tested-by: Andy Whitcroft Cc: Eric Anholt CC: stable@kernel.org Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 725b4484a092..c60ca32f65d2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3158,6 +3158,16 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } + if (reloc->delta >= target_obj->size) { + DRM_ERROR("Relocation beyond target object bounds: " + "obj %p target %d delta %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->delta, (int) target_obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + if (reloc->write_domain & I915_GEM_DOMAIN_CPU || reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " -- cgit v1.2.3 From 8542a0bbbbda412560820b4c3b04e8399e2e99c1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Sep 2009 21:15:15 +0100 Subject: drm/i915: Skip the sanity checks if the current relocation is valid If the presumed_offset as feed to userspace and returned to the kernel from a previous execbuffer is still valid, then we do not need to rewrite the relocation entry and may skip the offset sanity checks. Signed-off-by: Chris Wilson Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_gem.c | 92 +++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 45 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c60ca32f65d2..dea9ac069851 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3128,6 +3128,21 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, } target_obj_priv = target_obj->driver_private; +#if WATCH_RELOC + DRM_INFO("%s: obj %p offset %08x target %d " + "read %08x write %08x gtt %08x " + "presumed %08x delta %08x\n", + __func__, + obj, + (int) reloc->offset, + (int) reloc->target_handle, + (int) reloc->read_domains, + (int) reloc->write_domain, + (int) target_obj_priv->gtt_offset, + (int) reloc->presumed_offset, + reloc->delta); +#endif + /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. */ @@ -3139,35 +3154,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } - if (reloc->offset > obj->size - 4) { - DRM_ERROR("Relocation beyond object bounds: " - "obj %p target %d offset %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->offset, (int) obj->size); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - if (reloc->offset & 3) { - DRM_ERROR("Relocation not 4-byte aligned: " - "obj %p target %d offset %d.\n", - obj, reloc->target_handle, - (int) reloc->offset); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - - if (reloc->delta >= target_obj->size) { - DRM_ERROR("Relocation beyond target object bounds: " - "obj %p target %d delta %d size %d.\n", - obj, reloc->target_handle, - (int) reloc->delta, (int) target_obj->size); - drm_gem_object_unreference(target_obj); - i915_gem_object_unpin(obj); - return -EINVAL; - } - + /* Validate that the target is in a valid r/w GPU domain */ if (reloc->write_domain & I915_GEM_DOMAIN_CPU || reloc->read_domains & I915_GEM_DOMAIN_CPU) { DRM_ERROR("reloc with read/write CPU domains: " @@ -3181,7 +3168,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, i915_gem_object_unpin(obj); return -EINVAL; } - if (reloc->write_domain && target_obj->pending_write_domain && reloc->write_domain != target_obj->pending_write_domain) { DRM_ERROR("Write domain conflict: " @@ -3196,21 +3182,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, return -EINVAL; } -#if WATCH_RELOC - DRM_INFO("%s: obj %p offset %08x target %d " - "read %08x write %08x gtt %08x " - "presumed %08x delta %08x\n", - __func__, - obj, - (int) reloc->offset, - (int) reloc->target_handle, - (int) reloc->read_domains, - (int) reloc->write_domain, - (int) target_obj_priv->gtt_offset, - (int) reloc->presumed_offset, - reloc->delta); -#endif - target_obj->pending_read_domains |= reloc->read_domains; target_obj->pending_write_domain |= reloc->write_domain; @@ -3222,6 +3193,37 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, continue; } + /* Check that the relocation address is valid... */ + if (reloc->offset > obj->size - 4) { + DRM_ERROR("Relocation beyond object bounds: " + "obj %p target %d offset %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->offset, (int) obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + if (reloc->offset & 3) { + DRM_ERROR("Relocation not 4-byte aligned: " + "obj %p target %d offset %d.\n", + obj, reloc->target_handle, + (int) reloc->offset); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + + /* and points to somewhere within the target object. */ + if (reloc->delta >= target_obj->size) { + DRM_ERROR("Relocation beyond target object bounds: " + "obj %p target %d delta %d size %d.\n", + obj, reloc->target_handle, + (int) reloc->delta, (int) target_obj->size); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); + return -EINVAL; + } + ret = i915_gem_object_set_to_gtt_domain(obj, 1); if (ret != 0) { drm_gem_object_unreference(target_obj); -- cgit v1.2.3 From 1c5d22f76dc721f3acb7a3dadc657a221e487fb7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Aug 2009 11:15:50 +0100 Subject: drm/i915: Add tracepoints By adding tracepoint equivalents for WATCH_BUF/EXEC we are able to monitor the lifetimes of objects, requests and significant events. These events can then be probed using the tracing frameworks, such as systemtap and, in particular, perf. For example to record the stack trace for every GPU stall during a run, use $ perf record -e i915:i915_gem_request_wait_begin -c 1 -g And $ perf report to view the results. [Updated to fix compilation issues caused.] Cc: Arjan van de Ven Cc: Ben Gamari Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_dma.c | 8 +- drivers/gpu/drm/i915/i915_gem.c | 119 ++++++++++-- drivers/gpu/drm/i915/i915_irq.c | 9 +- drivers/gpu/drm/i915/i915_trace.h | 315 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_trace_points.c | 11 ++ 6 files changed, 447 insertions(+), 16 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_trace.h create mode 100644 drivers/gpu/drm/i915/i915_trace_points.c (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5269dfa5f620..fa7b9be096bc 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -9,6 +9,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_gem.o \ i915_gem_debug.o \ i915_gem_tiling.o \ + i915_trace_points.o \ intel_display.o \ intel_crt.o \ intel_lvds.o \ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 59826c5b8760..ae7ec0390024 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -33,6 +33,7 @@ #include "intel_drv.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" /* Really want an OS-independent resettable timer. Would like to have * this loop run for (eg) 3 sec, but have the timer reset every time @@ -49,14 +50,18 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) u32 last_head = I915_READ(PRB0_HEAD) & HEAD_ADDR; int i; + trace_i915_ring_wait_begin (dev); + for (i = 0; i < 100000; i++) { ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR; acthd = I915_READ(acthd_reg); ring->space = ring->head - (ring->tail + 8); if (ring->space < 0) ring->space += ring->Size; - if (ring->space >= n) + if (ring->space >= n) { + trace_i915_ring_wait_end (dev); return 0; + } if (dev->primary->master) { struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; @@ -76,6 +81,7 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller) } + trace_i915_ring_wait_end (dev); return -EBUSY; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index dea9ac069851..67e2cd5636ec 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #include #include @@ -1618,8 +1619,14 @@ i915_add_request(struct drm_device *dev, struct drm_file *file_priv, if ((obj->write_domain & flush_domains) == obj->write_domain) { + uint32_t old_write_domain = obj->write_domain; + obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } } @@ -1667,6 +1674,8 @@ i915_gem_retire_request(struct drm_device *dev, { drm_i915_private_t *dev_priv = dev->dev_private; + trace_i915_gem_request_retire(dev, request->seqno); + /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate. */ @@ -1810,6 +1819,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) i915_driver_irq_postinstall(dev); } + trace_i915_gem_request_wait_begin(dev, seqno); + dev_priv->mm.waiting_gem_seqno = seqno; i915_user_irq_get(dev); ret = wait_event_interruptible(dev_priv->irq_queue, @@ -1818,6 +1829,8 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno) atomic_read(&dev_priv->mm.wedged)); i915_user_irq_put(dev); dev_priv->mm.waiting_gem_seqno = 0; + + trace_i915_gem_request_wait_end(dev, seqno); } if (atomic_read(&dev_priv->mm.wedged)) ret = -EIO; @@ -1850,6 +1863,8 @@ i915_gem_flush(struct drm_device *dev, DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, invalidate_domains, flush_domains); #endif + trace_i915_gem_request_flush(dev, dev_priv->mm.next_gem_seqno, + invalidate_domains, flush_domains); if (flush_domains & I915_GEM_DOMAIN_CPU) drm_agp_chipset_flush(dev); @@ -2003,6 +2018,8 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (!list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); + trace_i915_gem_object_unbind(obj); + return 0; } @@ -2452,6 +2469,8 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj) else i830_write_fence_reg(reg); + trace_i915_gem_object_get_fence(obj, i, obj_priv->tiling_mode); + return 0; } @@ -2650,6 +2669,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS); BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); + trace_i915_gem_object_bind(obj, obj_priv->gtt_offset); + return 0; } @@ -2665,6 +2686,8 @@ i915_gem_clflush_object(struct drm_gem_object *obj) if (obj_priv->pages == NULL) return; + trace_i915_gem_object_clflush(obj); + drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE); } @@ -2674,21 +2697,29 @@ i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; uint32_t seqno; + uint32_t old_write_domain; if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) return; /* Queue the GPU write cache flushing we need. */ + old_write_domain = obj->write_domain; i915_gem_flush(dev, 0, obj->write_domain); seqno = i915_add_request(dev, NULL, obj->write_domain); obj->write_domain = 0; i915_gem_object_move_to_active(obj, seqno); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) { + uint32_t old_write_domain; + if (obj->write_domain != I915_GEM_DOMAIN_GTT) return; @@ -2696,7 +2727,12 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) * to it immediately go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. */ + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -2704,13 +2740,19 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; + uint32_t old_write_domain; if (obj->write_domain != I915_GEM_DOMAIN_CPU) return; i915_gem_clflush_object(obj); drm_agp_chipset_flush(dev); + old_write_domain = obj->write_domain; obj->write_domain = 0; + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } /** @@ -2723,6 +2765,7 @@ int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ @@ -2735,6 +2778,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) if (ret != 0) return ret; + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* If we're writing through the GTT domain, then CPU and GPU caches * will need to be invalidated at next use. */ @@ -2753,6 +2799,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) obj_priv->dirty = 1; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2765,6 +2815,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) { + uint32_t old_write_domain, old_read_domains; int ret; i915_gem_object_flush_gpu_write_domain(obj); @@ -2780,6 +2831,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) */ i915_gem_object_set_to_full_cpu_read_domain(obj); + old_write_domain = obj->write_domain; + old_read_domains = obj->read_domains; + /* Flush the CPU cache if it's still invalid. */ if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); @@ -2800,6 +2854,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) obj->write_domain = I915_GEM_DOMAIN_CPU; } + trace_i915_gem_object_change_domain(obj, + old_read_domains, + old_write_domain); + return 0; } @@ -2921,6 +2979,7 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) struct drm_i915_gem_object *obj_priv = obj->driver_private; uint32_t invalidate_domains = 0; uint32_t flush_domains = 0; + uint32_t old_read_domains; BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU); BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU); @@ -2967,6 +3026,8 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) i915_gem_clflush_object(obj); } + old_read_domains = obj->read_domains; + /* The actual obj->write_domain will be updated with * pending_write_domain after we emit the accumulated flush for all * of our domain changes in execbuffers (which clears objects' @@ -2985,6 +3046,10 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj) obj->read_domains, obj->write_domain, dev->invalidate_domains, dev->flush_domains); #endif + + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); } /** @@ -3037,6 +3102,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, uint64_t offset, uint64_t size) { struct drm_i915_gem_object *obj_priv = obj->driver_private; + uint32_t old_read_domains; int i, ret; if (offset == 0 && size == obj->size) @@ -3083,8 +3149,13 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, */ BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + old_read_domains = obj->read_domains; obj->read_domains |= I915_GEM_DOMAIN_CPU; + trace_i915_gem_object_change_domain(obj, + old_read_domains, + obj->write_domain); + return 0; } @@ -3282,6 +3353,8 @@ i915_dispatch_gem_execbuffer(struct drm_device *dev, exec_start = (uint32_t) exec_offset + exec->batch_start_offset; exec_len = (uint32_t) exec->batch_len; + trace_i915_gem_request_submit(dev, dev_priv->mm.next_gem_seqno); + count = nbox ? nbox : 1; for (i = 0; i < count; i++) { @@ -3660,8 +3733,12 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; + uint32_t old_write_domain = obj->write_domain; obj->write_domain = obj->pending_write_domain; + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } i915_verify_inactive(dev, __FILE__, __LINE__); @@ -4050,6 +4127,8 @@ int i915_gem_init_object(struct drm_gem_object *obj) INIT_LIST_HEAD(&obj_priv->fence_list); obj_priv->madv = I915_MADV_WILLNEED; + trace_i915_gem_object_create(obj); + return 0; } @@ -4058,6 +4137,8 @@ void i915_gem_free_object(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; struct drm_i915_gem_object *obj_priv = obj->driver_private; + trace_i915_gem_object_destroy(obj); + while (obj_priv->pin_count > 0) i915_gem_object_unpin(obj); @@ -4186,24 +4267,36 @@ i915_gem_idle(struct drm_device *dev) * the GPU domains and just stuff them onto inactive. */ while (!list_empty(&dev_priv->mm.active_list)) { - struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + uint32_t old_write_domain; - obj_priv = list_first_entry(&dev_priv->mm.active_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + obj = list_first_entry(&dev_priv->mm.active_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } spin_unlock(&dev_priv->mm.active_list_lock); while (!list_empty(&dev_priv->mm.flushing_list)) { - struct drm_i915_gem_object *obj_priv; - - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS; - i915_gem_object_move_to_inactive(obj_priv->obj); + struct drm_gem_object *obj; + uint32_t old_write_domain; + + obj = list_first_entry(&dev_priv->mm.flushing_list, + struct drm_i915_gem_object, + list)->obj; + old_write_domain = obj->write_domain; + obj->write_domain &= ~I915_GEM_GPU_DOMAINS; + i915_gem_object_move_to_inactive(obj); + + trace_i915_gem_object_change_domain(obj, + obj->read_domains, + old_write_domain); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 13e664ddb611..4dfeec7cdd42 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -31,6 +31,7 @@ #include "drm.h" #include "i915_drm.h" #include "i915_drv.h" +#include "i915_trace.h" #include "intel_drv.h" #define MAX_NOPID ((u32)~0) @@ -279,7 +280,9 @@ irqreturn_t igdng_irq_handler(struct drm_device *dev) } if (gt_iir & GT_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); } @@ -622,7 +625,9 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) } if (iir & I915_USER_INTERRUPT) { - dev_priv->mm.irq_gem_seqno = i915_get_gem_seqno(dev); + u32 seqno = i915_get_gem_seqno(dev); + dev_priv->mm.irq_gem_seqno = seqno; + trace_i915_gem_request_complete(dev, seqno); DRM_WAKEUP(&dev_priv->irq_queue); dev_priv->hangcheck_count = 0; mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h new file mode 100644 index 000000000000..5567a40816f3 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -0,0 +1,315 @@ +#if !defined(_I915_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _I915_TRACE_H_ + +#include +#include +#include + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM i915 +#define TRACE_SYSTEM_STRING __stringify(TRACE_SYSTEM) +#define TRACE_INCLUDE_FILE i915_trace + +/* object tracking */ + +TRACE_EVENT(i915_gem_object_create, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, size) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->size = obj->size; + ), + + TP_printk("obj=%p, size=%u", __entry->obj, __entry->size) +); + +TRACE_EVENT(i915_gem_object_bind, + + TP_PROTO(struct drm_gem_object *obj, u32 gtt_offset), + + TP_ARGS(obj, gtt_offset), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, gtt_offset) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->gtt_offset = gtt_offset; + ), + + TP_printk("obj=%p, gtt_offset=%08x", + __entry->obj, __entry->gtt_offset) +); + +TRACE_EVENT(i915_gem_object_clflush, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_change_domain, + + TP_PROTO(struct drm_gem_object *obj, uint32_t old_read_domains, uint32_t old_write_domain), + + TP_ARGS(obj, old_read_domains, old_write_domain), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(u32, read_domains) + __field(u32, write_domain) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->read_domains = obj->read_domains | (old_read_domains << 16); + __entry->write_domain = obj->write_domain | (old_write_domain << 16); + ), + + TP_printk("obj=%p, read=%04x, write=%04x", + __entry->obj, + __entry->read_domains, __entry->write_domain) +); + +TRACE_EVENT(i915_gem_object_get_fence, + + TP_PROTO(struct drm_gem_object *obj, int fence, int tiling_mode), + + TP_ARGS(obj, fence, tiling_mode), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + __field(int, fence) + __field(int, tiling_mode) + ), + + TP_fast_assign( + __entry->obj = obj; + __entry->fence = fence; + __entry->tiling_mode = tiling_mode; + ), + + TP_printk("obj=%p, fence=%d, tiling=%d", + __entry->obj, __entry->fence, __entry->tiling_mode) +); + +TRACE_EVENT(i915_gem_object_unbind, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +TRACE_EVENT(i915_gem_object_destroy, + + TP_PROTO(struct drm_gem_object *obj), + + TP_ARGS(obj), + + TP_STRUCT__entry( + __field(struct drm_gem_object *, obj) + ), + + TP_fast_assign( + __entry->obj = obj; + ), + + TP_printk("obj=%p", __entry->obj) +); + +/* batch tracing */ + +TRACE_EVENT(i915_gem_request_submit, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_flush, + + TP_PROTO(struct drm_device *dev, u32 seqno, + u32 flush_domains, u32 invalidate_domains), + + TP_ARGS(dev, seqno, flush_domains, invalidate_domains), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + __field(u32, flush_domains) + __field(u32, invalidate_domains) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + __entry->flush_domains = flush_domains; + __entry->invalidate_domains = invalidate_domains; + ), + + TP_printk("dev=%p, seqno=%u, flush=%04x, invalidate=%04x", + __entry->dev, __entry->seqno, + __entry->flush_domains, __entry->invalidate_domains) +); + + +TRACE_EVENT(i915_gem_request_complete, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_retire, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_begin, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_gem_request_wait_end, + + TP_PROTO(struct drm_device *dev, u32 seqno), + + TP_ARGS(dev, seqno), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + __field(u32, seqno) + ), + + TP_fast_assign( + __entry->dev = dev; + __entry->seqno = seqno; + ), + + TP_printk("dev=%p, seqno=%u", __entry->dev, __entry->seqno) +); + +TRACE_EVENT(i915_ring_wait_begin, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +TRACE_EVENT(i915_ring_wait_end, + + TP_PROTO(struct drm_device *dev), + + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(struct drm_device *, dev) + ), + + TP_fast_assign( + __entry->dev = dev; + ), + + TP_printk("dev=%p", __entry->dev) +); + +#endif /* _I915_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915 +#include diff --git a/drivers/gpu/drm/i915/i915_trace_points.c b/drivers/gpu/drm/i915/i915_trace_points.c new file mode 100644 index 000000000000..ead876eb6ea0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_trace_points.c @@ -0,0 +1,11 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Authors: + * Chris Wilson + */ + +#include "i915_drv.h" + +#define CREATE_TRACE_POINTS +#include "i915_trace.h" -- cgit v1.2.3 From ab5ee57650165dc342a20d1213d48d585f2a72bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Sep 2009 19:25:47 +0100 Subject: drm/i915: Clean up evict from list. First the routine attempted to unlock a mutex it did not own along the error path. Secondly the routine should never be called on any list but the inactive one, since we attempt to unbind those objects, so fix the calling semantics. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 67e2cd5636ec..d49ac1c2e396 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -50,8 +50,7 @@ static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static int i915_gem_evict_something(struct drm_device *dev, int min_size); -static int i915_gem_evict_from_list(struct drm_device *dev, - struct list_head *head); +static int i915_gem_evict_from_inactive_list(struct drm_device *dev); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file_priv); @@ -2095,7 +2094,7 @@ i915_gem_evict_everything(struct drm_device *dev) if (ret) return ret; - ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); + ret = i915_gem_evict_from_inactive_list(dev); if (ret) return ret; @@ -2195,8 +2194,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size) */ if (!list_empty (&dev_priv->mm.inactive_list)) { DRM_INFO("GTT full, evicting inactive buffers\n"); - return i915_gem_evict_from_list(dev, - &dev_priv->mm.inactive_list); + return i915_gem_evict_from_inactive_list(dev); } else return i915_gem_evict_everything(dev); } @@ -4155,36 +4153,27 @@ void i915_gem_free_object(struct drm_gem_object *obj) kfree(obj->driver_private); } -/** Unbinds all objects that are on the given buffer list. */ +/** Unbinds all inactive objects. */ static int -i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) +i915_gem_evict_from_inactive_list(struct drm_device *dev) { - struct drm_gem_object *obj; - struct drm_i915_gem_object *obj_priv; - int ret; + drm_i915_private_t *dev_priv = dev->dev_private; - while (!list_empty(head)) { - obj_priv = list_first_entry(head, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; + while (!list_empty(&dev_priv->mm.inactive_list)) { + struct drm_gem_object *obj; + int ret; - if (obj_priv->pin_count != 0) { - DRM_ERROR("Pinned object in unbind list\n"); - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } + obj = list_first_entry(&dev_priv->mm.inactive_list, + struct drm_i915_gem_object, + list)->obj; ret = i915_gem_object_unbind(obj); if (ret != 0) { - DRM_ERROR("Error unbinding object in LeaveVT: %d\n", - ret); - mutex_unlock(&dev->struct_mutex); + DRM_ERROR("Error unbinding object: %d\n", ret); return ret; } } - return 0; } @@ -4301,7 +4290,7 @@ i915_gem_idle(struct drm_device *dev) /* Move all inactive buffers out of the GTT. */ - ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list); + ret = i915_gem_evict_from_inactive_list(dev); WARN_ON(!list_empty(&dev_priv->mm.inactive_list)); if (ret) { mutex_unlock(&dev->struct_mutex); -- cgit v1.2.3 From 9a1e2582d8d397500d5241d1543709046e0f05ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Sep 2009 20:16:50 +0100 Subject: drm/i915: Search harder for a reusable object As evict_something() is called by routines that do not repeatedly search again, try harder in the initial search to find an object that matches the request. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 43 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d49ac1c2e396..874cce62f51a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2113,7 +2113,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_gem_object *obj; - int have_waited = 0; int ret; for (;;) { @@ -2137,9 +2136,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size) return i915_gem_object_unbind(obj); } - if (have_waited) - return 0; - /* If we didn't get anything, but the ring is still processing * things, wait for the next to finish and hopefully leave us * a buffer to evict. @@ -2155,7 +2151,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size) if (ret) return ret; - have_waited = 1; continue; } @@ -2166,26 +2161,32 @@ i915_gem_evict_something(struct drm_device *dev, int min_size) */ if (!list_empty(&dev_priv->mm.flushing_list)) { struct drm_i915_gem_object *obj_priv; - uint32_t seqno; - obj_priv = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - list); - obj = obj_priv->obj; + /* Find an object that we can immediately reuse */ + list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) { + obj = obj_priv->obj; + if (obj->size >= min_size) + break; - i915_gem_flush(dev, - obj->write_domain, - obj->write_domain); - seqno = i915_add_request(dev, NULL, obj->write_domain); - if (seqno == 0) - return -ENOMEM; + obj = NULL; + } - ret = i915_wait_request(dev, seqno); - if (ret) - return ret; + if (obj != NULL) { + uint32_t seqno; - have_waited = 1; - continue; + i915_gem_flush(dev, + obj->write_domain, + obj->write_domain); + seqno = i915_add_request(dev, NULL, obj->write_domain); + if (seqno == 0) + return -ENOMEM; + + ret = i915_wait_request(dev, seqno); + if (ret) + return ret; + + continue; + } } /* If we didn't do any of the above, there's no single buffer -- cgit v1.2.3 From a32808c0a1244a52038bb94a3efcdd6a64a31a5b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Sep 2009 21:29:47 +0100 Subject: drm/i915: BUG_ON page refleak during unbind Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 874cce62f51a..93b4232ee280 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2004,6 +2004,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj) } i915_gem_object_put_pages(obj); + BUG_ON(obj_priv->pages_refcount); if (obj_priv->gtt_space) { atomic_dec(&dev->gtt_count); -- cgit v1.2.3 From 13a05fd978a110d1efcda4a09e225aa156204ea3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Sep 2009 23:03:19 +0100 Subject: drm/i915: Whitespace correction for madv Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 93b4232ee280..c8ecc75aa79b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1478,7 +1478,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) i915_gem_object_save_bit_17_swizzle(obj); if (obj_priv->madv == I915_MADV_DONTNEED) - obj_priv->dirty = 0; + obj_priv->dirty = 0; for (i = 0; i < page_count; i++) { if (obj_priv->pages[i] == NULL) @@ -1488,7 +1488,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) set_page_dirty(obj_priv->pages[i]); if (obj_priv->madv == I915_MADV_WILLNEED) - mark_page_accessed(obj_priv->pages[i]); + mark_page_accessed(obj_priv->pages[i]); page_cache_release(obj_priv->pages[i]); } -- cgit v1.2.3 From 963b483691314ed174ceb883f2b9f13b3ef7fb33 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Sep 2009 23:03:54 +0100 Subject: drm/i915: Do not mis-classify clean objects as purgeable Whilst cleaning up the patches for submission, I mis-classified non-dirty objects as purgeable. This was causing the backing pages for those objects to be evicted under memory-pressure, discarding valid and unreplaceable texture data. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 55 +++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8ecc75aa79b..c69f1fa38cc8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1530,6 +1530,23 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj) obj_priv->last_rendering_seqno = 0; } +/* Immediately discard the backing storage */ +static void +i915_gem_object_truncate(struct drm_gem_object *obj) +{ + struct inode *inode; + + inode = obj->filp->f_path.dentry->d_inode; + if (inode->i_op->truncate) + inode->i_op->truncate (inode); +} + +static inline int +i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) +{ + return obj_priv->madv == I915_MADV_DONTNEED; +} + static void i915_gem_object_move_to_inactive(struct drm_gem_object *obj) { @@ -2018,17 +2035,14 @@ i915_gem_object_unbind(struct drm_gem_object *obj) if (!list_empty(&obj_priv->list)) list_del_init(&obj_priv->list); + if (i915_gem_object_is_purgeable(obj_priv)) + i915_gem_object_truncate(obj); + trace_i915_gem_object_unbind(obj); return 0; } -static inline int -i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv) -{ - return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED; -} - static struct drm_gem_object * i915_gem_find_inactive_object(struct drm_device *dev, int min_size) { @@ -2041,7 +2055,8 @@ i915_gem_find_inactive_object(struct drm_device *dev, int min_size) list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { struct drm_gem_object *obj = obj_priv->obj; if (obj->size >= min_size) { - if (i915_gem_object_is_purgeable(obj_priv) && + if ((!obj_priv->dirty || + i915_gem_object_is_purgeable(obj_priv)) && (!best || obj->size < best->size)) { best = obj; if (best->size == min_size) @@ -4808,19 +4823,6 @@ void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv) mutex_unlock(&dev->struct_mutex); } -/* Immediately discard the backing storage */ -static void -i915_gem_object_truncate(struct drm_gem_object *obj) -{ - struct inode *inode; - - inode = obj->filp->f_path.dentry->d_inode; - - mutex_lock(&inode->i_mutex); - truncate_inode_pages(inode->i_mapping, 0); - mutex_unlock(&inode->i_mutex); -} - static int i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) { @@ -4866,10 +4868,7 @@ i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) &dev_priv->mm.inactive_list, list) { if (i915_gem_object_is_purgeable(obj_priv)) { - struct drm_gem_object *obj = obj_priv->obj; - i915_gem_object_unbind(obj); - i915_gem_object_truncate(obj); - + i915_gem_object_unbind(obj_priv->obj); if (--nr_to_scan <= 0) break; } @@ -4878,6 +4877,8 @@ i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) spin_lock(&shrink_list_lock); mutex_unlock(&dev->struct_mutex); + would_deadlock = 0; + if (nr_to_scan <= 0) break; } @@ -4896,11 +4897,7 @@ i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask) &dev_priv->mm.inactive_list, list) { if (nr_to_scan > 0) { - struct drm_gem_object *obj = obj_priv->obj; - i915_gem_object_unbind(obj); - if (i915_gem_object_is_purgeable(obj_priv)) - i915_gem_object_truncate(obj); - + i915_gem_object_unbind(obj_priv->obj); nr_to_scan--; } else cnt++; -- cgit v1.2.3 From 2d7ef395b310e17c86fa6190f21ea1f2eccae5d1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 20 Sep 2009 23:13:10 +0100 Subject: drm/i915: Immediately discard any backing storage for uneeded objects Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c69f1fa38cc8..62ba9c121a14 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4110,6 +4110,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj_priv->madv = args->madv; args->retained = obj_priv->gtt_space != NULL; + /* if the object is no longer bound, discard its backing storage */ + if (i915_gem_object_is_purgeable(obj_priv) && + obj_priv->gtt_space == NULL) + i915_gem_object_truncate(obj); + drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); -- cgit v1.2.3 From 9731129c5e3077d0c2da13479f91c3a07e341f70 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 21 Sep 2009 00:22:34 +0100 Subject: drm/i915: Remove eviction debug spam Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 40 +++++++--------------------------------- 1 file changed, 7 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 62ba9c121a14..8beec97fa348 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2078,27 +2078,14 @@ i915_gem_evict_everything(struct drm_device *dev) int ret; bool lists_empty; - DRM_INFO("GTT full, evicting everything: " - "%d objects [%d pinned], " - "%d object bytes [%d pinned], " - "%d/%d gtt bytes\n", - atomic_read(&dev->object_count), - atomic_read(&dev->pin_count), - atomic_read(&dev->object_memory), - atomic_read(&dev->pin_memory), - atomic_read(&dev->gtt_memory), - dev->gtt_total); - spin_lock(&dev_priv->mm.active_list_lock); lists_empty = (list_empty(&dev_priv->mm.inactive_list) && list_empty(&dev_priv->mm.flushing_list) && list_empty(&dev_priv->mm.active_list)); spin_unlock(&dev_priv->mm.active_list_lock); - if (lists_empty) { - DRM_ERROR("GTT full, but lists empty!\n"); + if (lists_empty) return -ENOSPC; - } /* Flush everything (on to the inactive lists) and evict */ i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); @@ -2209,10 +2196,9 @@ i915_gem_evict_something(struct drm_device *dev, int min_size) * large enough to swap out for the new one, so just evict * everything and start again. (This should be rare.) */ - if (!list_empty (&dev_priv->mm.inactive_list)) { - DRM_INFO("GTT full, evicting inactive buffers\n"); + if (!list_empty (&dev_priv->mm.inactive_list)) return i915_gem_evict_from_inactive_list(dev); - } else + else return i915_gem_evict_everything(dev); } } @@ -2237,7 +2223,6 @@ i915_gem_object_get_pages(struct drm_gem_object *obj) BUG_ON(obj_priv->pages != NULL); obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *)); if (obj_priv->pages == NULL) { - DRM_ERROR("Failed to allocate page list\n"); obj_priv->pages_refcount--; return -ENOMEM; } @@ -2605,11 +2590,9 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) DRM_INFO("%s: GTT full, evicting something\n", __func__); #endif ret = i915_gem_evict_something(dev, obj->size); - if (ret != 0) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to evict a buffer %d\n", ret); + if (ret) return ret; - } + goto search_free; } @@ -2634,9 +2617,6 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) /* first try to clear up some space from the GTT */ ret = i915_gem_evict_something(dev, obj->size); if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to allocate space for backing pages %d\n", ret); - /* now try to shrink everyone else */ if (! retry_alloc) { retry_alloc = true; @@ -2666,11 +2646,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) obj_priv->gtt_space = NULL; ret = i915_gem_evict_something(dev, obj->size); - if (ret) { - if (ret != -ERESTARTSYS) - DRM_ERROR("Failed to allocate space to bind AGP: %d\n", ret); + if (ret) return ret; - } goto search_free; } @@ -3870,11 +3847,8 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) i915_verify_inactive(dev, __FILE__, __LINE__); if (obj_priv->gtt_space == NULL) { ret = i915_gem_object_bind_to_gtt(obj, alignment); - if (ret != 0) { - if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to bind: %d\n", ret); + if (ret) return ret; - } } /* * Pre-965 chips need a fence register set up in order to -- cgit v1.2.3 From bb6baf76f45708dbba651ed76a7ad94462f30c0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Sep 2009 14:24:13 +0100 Subject: drm/i915: Track purged state. In order to correctly prevent the invalid reuse of a purged buffer, we need to track such events and warn the user before something bad happens. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 24 +++++++++++++++--------- include/drm/i915_drm.h | 1 + 2 files changed, 16 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8beec97fa348..f4f714e39b7b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1470,6 +1470,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj) int i; BUG_ON(obj_priv->pages_refcount == 0); + BUG_ON(obj_priv->madv == __I915_MADV_PURGED); if (--obj_priv->pages_refcount != 0) return; @@ -1534,11 +1535,14 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj) static void i915_gem_object_truncate(struct drm_gem_object *obj) { - struct inode *inode; + struct drm_i915_gem_object *obj_priv = obj->driver_private; + struct inode *inode; - inode = obj->filp->f_path.dentry->d_inode; - if (inode->i_op->truncate) - inode->i_op->truncate (inode); + inode = obj->filp->f_path.dentry->d_inode; + if (inode->i_op->truncate) + inode->i_op->truncate (inode); + + obj_priv->madv = __I915_MADV_PURGED; } static inline int @@ -2559,7 +2563,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) if (dev_priv->mm.suspended) return -EBUSY; - if (obj_priv->madv == I915_MADV_DONTNEED) { + if (obj_priv->madv != I915_MADV_WILLNEED) { DRM_ERROR("Attempting to bind a purgeable object\n"); return -EINVAL; } @@ -3928,8 +3932,8 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, } obj_priv = obj->driver_private; - if (obj_priv->madv == I915_MADV_DONTNEED) { - DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n"); + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to pin a purgeable buffer\n"); drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); return -EINVAL; @@ -4081,14 +4085,16 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - obj_priv->madv = args->madv; - args->retained = obj_priv->gtt_space != NULL; + if (obj_priv->madv != __I915_MADV_PURGED) + obj_priv->madv = args->madv; /* if the object is no longer bound, discard its backing storage */ if (i915_gem_object_is_purgeable(obj_priv) && obj_priv->gtt_space == NULL) i915_gem_object_truncate(obj); + args->retained = obj_priv->madv != __I915_MADV_PURGED; + drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 607c9da061e8..7e0cb1da92e6 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -671,6 +671,7 @@ struct drm_i915_get_pipe_from_crtc_id { #define I915_MADV_WILLNEED 0 #define I915_MADV_DONTNEED 1 +#define __I915_MADV_PURGED 2 /* internal state */ struct drm_i915_gem_madvise { /** Handle of the buffer to change the backing store advice */ -- cgit v1.2.3 From ab18282d58ce67ee5cd720d99a91c1a2bbf3e693 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 22 Sep 2009 18:46:17 +0100 Subject: drm/i915: Warn before mmaping a purgeable buffer. Only allow the user to mmap buffers that have not been marked as purgeable. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f4f714e39b7b..4755ba41bfef 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1431,6 +1431,14 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, obj_priv = obj->driver_private; + if (obj_priv->madv != I915_MADV_WILLNEED) { + DRM_ERROR("Attempting to mmap a purgeable buffer\n"); + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return -EINVAL; + } + + if (!obj_priv->mmap_offset) { ret = i915_gem_create_mmap_offset(obj); if (ret) { -- cgit v1.2.3 From c715089f49844260f1eeae8e3b55af9468ba1325 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 23 Sep 2009 00:43:56 +0100 Subject: drm/i915: Handle ERESTARTSYS during page fault During a page fault and rebinding the buffer there exists a window for a signal to arrive during the i915_wait_request() and trigger a ERESTARTSYS. This used to be handled by returning SIGBUS and thereby killing the application. Try 'cairo-perf-trace & cairo-test-suite' and watch X go boom! The solution as suggested by H. Peter Anvin is to simply return NOPAGE and leave the higher layers to spot we did not fill the page and resubmit the page fault. Signed-off-by: Chris Wilson Cc: stable@kernel.org [anholt: Mostly squash it with another commit] --- drivers/gpu/drm/i915/i915_gem.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4755ba41bfef..6129b7b4f1a5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1200,26 +1200,21 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) mutex_lock(&dev->struct_mutex); if (!obj_priv->gtt_space) { ret = i915_gem_object_bind_to_gtt(obj, 0); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; + list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list); ret = i915_gem_object_set_to_gtt_domain(obj, write); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; } /* Need a new fence register? */ if (obj_priv->tiling_mode != I915_TILING_NONE) { ret = i915_gem_object_get_fence_reg(obj); - if (ret) { - mutex_unlock(&dev->struct_mutex); - return VM_FAULT_SIGBUS; - } + if (ret) + goto unlock; } pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) + @@ -1227,18 +1222,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn); - +unlock: mutex_unlock(&dev->struct_mutex); switch (ret) { + case 0: + case -ERESTARTSYS: + return VM_FAULT_NOPAGE; case -ENOMEM: case -EAGAIN: return VM_FAULT_OOM; - case -EFAULT: - case -EINVAL: - return VM_FAULT_SIGBUS; default: - return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } } -- cgit v1.2.3