diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 58 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 2 |
4 files changed, 35 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b5df2308f8e6..34d3f2fae8ac 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error) } void i915_gem_reset(struct drm_device *dev); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b98c3b0e5a02..54d76e9392d8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,7 +37,8 @@ #include <linux/dma-buf.h> static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); -static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); +static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force); static __must_check int i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, return HAS_LLC(dev) || level != I915_CACHE_NONE; } +static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + return true; + + return obj->pin_display; +} + static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) @@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ - if (obj->cache_level == I915_CACHE_NONE) - needs_clflush_after = 1; + needs_clflush_after = cpu_write_needs_clflush(obj); if (i915_gem_obj_bound_any(obj)) { ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -833,7 +841,7 @@ out: */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, obj->pin_display); i915_gem_chipset_flush(dev); } } @@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, goto out; } - if (obj->cache_level == I915_CACHE_NONE && - obj->tiling_mode == I915_TILING_NONE && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + if (obj->tiling_mode == I915_TILING_NONE && + obj->base.write_domain != I915_GEM_DOMAIN_CPU && + cpu_write_needs_clflush(obj)) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between @@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, } /* Pinned buffers may be scanout, so flush the cache */ - if (obj->pin_count) - i915_gem_object_flush_cpu_write_domain(obj); + if (obj->pin_display) + i915_gem_object_flush_cpu_write_domain(obj, true); drm_gem_object_unreference(&obj->base); unlock: @@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) * hope for the best. */ WARN_ON(ret != -EIO); - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, true); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } @@ -3217,7 +3225,8 @@ err_unpin: } void -i915_gem_clflush_object(struct drm_i915_gem_object *obj) +i915_gem_clflush_object(struct drm_i915_gem_object *obj, + bool force) { /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen @@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (obj->cache_level != I915_CACHE_NONE) + if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return; trace_i915_gem_object_clflush(obj); @@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) /** Flushes the CPU write domain for the object if it's dirty. */ static void -i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) +i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj, + bool force) { uint32_t old_write_domain; if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, force); i915_gem_chipset_flush(obj->base.dev); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, false); /* Serialise direct access to this object with the barriers for * coherent writes from the GPU, by effectively invalidating the @@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, obj, cache_level); } - if (cache_level == I915_CACHE_NONE) { + list_for_each_entry(vma, &obj->vma_list, vma_link) + vma->node.color = cache_level; + obj->cache_level = cache_level; + + if (cpu_write_needs_clflush(obj)) { u32 old_read_domains, old_write_domain; /* If we're coming from LLC cached, then we haven't @@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, old_write_domain); } - list_for_each_entry(vma, &obj->vma_list, vma_link) - vma->node.color = cache_level; - obj->cache_level = cache_level; i915_gem_verify_gtt(dev); return 0; } @@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, if (ret) goto err_unpin_display; - i915_gem_object_flush_cpu_write_domain(obj); + i915_gem_object_flush_cpu_write_domain(obj, true); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; @@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, obj->user_pin_count++; obj->pin_filp = file; - /* XXX - flush the CPU caches for pinned objects - * as the X server doesn't manage domains yet - */ - i915_gem_object_flush_cpu_write_domain(obj); args->offset = i915_gem_obj_ggtt_offset(obj); out: drm_gem_object_unreference(&obj->base); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8ccc29ac9629..e999578a021c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, false); flush_domains |= obj->base.write_domain; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 24fb989593f0..c9420c280cf0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - i915_gem_clflush_object(obj); + i915_gem_clflush_object(obj, obj->pin_display); i915_gem_gtt_bind_object(obj, obj->cache_level); } |