summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h2
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c58
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
4 files changed, 35 insertions, 29 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b5df2308f8e6..34d3f2fae8ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
}
void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
+void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b98c3b0e5a02..54d76e9392d8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -37,7 +37,8 @@
#include <linux/dma-buf.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool force);
static __must_check int
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
return HAS_LLC(dev) || level != I915_CACHE_NONE;
}
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+ if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+ return true;
+
+ return obj->pin_display;
+}
+
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (obj->tiling_mode)
@@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
- if (obj->cache_level == I915_CACHE_NONE)
- needs_clflush_after = 1;
+ needs_clflush_after = cpu_write_needs_clflush(obj);
if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@@ -833,7 +841,7 @@ out:
*/
if (!needs_clflush_after &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_chipset_flush(dev);
}
}
@@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- if (obj->cache_level == I915_CACHE_NONE &&
- obj->tiling_mode == I915_TILING_NONE &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+ if (obj->tiling_mode == I915_TILING_NONE &&
+ obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+ cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
@@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
}
/* Pinned buffers may be scanout, so flush the cache */
- if (obj->pin_count)
- i915_gem_object_flush_cpu_write_domain(obj);
+ if (obj->pin_display)
+ i915_gem_object_flush_cpu_write_domain(obj, true);
drm_gem_object_unreference(&obj->base);
unlock:
@@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
* hope for the best.
*/
WARN_ON(ret != -EIO);
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, true);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -3217,7 +3225,8 @@ err_unpin:
}
void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+ bool force)
{
/* If we don't have a page list set up, then we're not pinned
* to GPU, and we can ignore the cache flush because it'll happen
@@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
- if (obj->cache_level != I915_CACHE_NONE)
+ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return;
trace_i915_gem_object_clflush(obj);
@@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
/** Flushes the CPU write domain for the object if it's dirty. */
static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+ bool force)
{
uint32_t old_write_domain;
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, force);
i915_gem_chipset_flush(obj->base.dev);
old_write_domain = obj->base.write_domain;
obj->base.write_domain = 0;
@@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, false);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj, cache_level);
}
- if (cache_level == I915_CACHE_NONE) {
+ list_for_each_entry(vma, &obj->vma_list, vma_link)
+ vma->node.color = cache_level;
+ obj->cache_level = cache_level;
+
+ if (cpu_write_needs_clflush(obj)) {
u32 old_read_domains, old_write_domain;
/* If we're coming from LLC cached, then we haven't
@@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
old_write_domain);
}
- list_for_each_entry(vma, &obj->vma_list, vma_link)
- vma->node.color = cache_level;
- obj->cache_level = cache_level;
i915_gem_verify_gtt(dev);
return 0;
}
@@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (ret)
goto err_unpin_display;
- i915_gem_object_flush_cpu_write_domain(obj);
+ i915_gem_object_flush_cpu_write_domain(obj, true);
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
- if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, false);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
}
@@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
obj->user_pin_count++;
obj->pin_filp = file;
- /* XXX - flush the CPU caches for pinned objects
- * as the X server doesn't manage domains yet
- */
- i915_gem_object_flush_cpu_write_domain(obj);
args->offset = i915_gem_obj_ggtt_offset(obj);
out:
drm_gem_object_unreference(&obj->base);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8ccc29ac9629..e999578a021c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, false);
flush_domains |= obj->base.write_domain;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 24fb989593f0..c9420c280cf0 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
dev_priv->gtt.base.total / PAGE_SIZE);
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- i915_gem_clflush_object(obj);
+ i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_gtt_bind_object(obj, obj->cache_level);
}
OpenPOWER on IntegriCloud