summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_ringbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c152
1 files changed, 133 insertions, 19 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 1aef516cc6fa..ecbc5c5dbbbc 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -261,6 +261,83 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring,
return 0;
}
+static int
+gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring)
+{
+ int ret;
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+ intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
+static int
+gen7_render_ring_flush(struct intel_ring_buffer *ring,
+ u32 invalidate_domains, u32 flush_domains)
+{
+ u32 flags = 0;
+ struct pipe_control *pc = ring->private;
+ u32 scratch_addr = pc->gtt_offset + 128;
+ int ret;
+
+ /*
+ * Ensure that any following seqno writes only happen when the render
+ * cache is indeed flushed.
+ *
+ * Workaround: 4th PIPE_CONTROL command (except the ones with only
+ * read-cache invalidate bits set) must have the CS_STALL bit set. We
+ * don't try to be clever and just set it unconditionally.
+ */
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ /* Just flush everything. Experiments have shown that reducing the
+ * number of bits based on the write domains has little performance
+ * impact.
+ */
+ if (flush_domains) {
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ }
+ if (invalidate_domains) {
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ /*
+ * TLB invalidate requires a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ /* Workaround: we must issue a pipe_control with CS-stall bit
+ * set before a pipe_control command that has the state cache
+ * invalidate bit set. */
+ gen7_render_ring_cs_stall_wa(ring);
+ }
+
+ ret = intel_ring_begin(ring, 4);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
+ intel_ring_emit(ring, flags);
+ intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+ intel_ring_emit(ring, 0);
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
static void ring_write_tail(struct intel_ring_buffer *ring,
u32 value)
{
@@ -381,12 +458,12 @@ init_pipe_control(struct intel_ring_buffer *ring)
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_pin(obj, 4096, true);
+ ret = i915_gem_object_pin(obj, 4096, true, false);
if (ret)
goto err_unref;
pc->gtt_offset = obj->gtt_offset;
- pc->cpu_page = kmap(obj->pages[0]);
+ pc->cpu_page = kmap(sg_page(obj->pages->sgl));
if (pc->cpu_page == NULL)
goto err_unpin;
@@ -413,7 +490,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
return;
obj = pc->obj;
- kunmap(obj->pages[0]);
+
+ kunmap(sg_page(obj->pages->sgl));
i915_gem_object_unpin(obj);
drm_gem_object_unreference(&obj->base);
@@ -461,7 +539,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
if (INTEL_INFO(dev)->gen >= 6)
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
- if (IS_IVYBRIDGE(dev))
+ if (HAS_L3_GPU_CACHE(dev))
I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
return ret;
@@ -627,26 +705,24 @@ pc_render_add_request(struct intel_ring_buffer *ring,
}
static u32
-gen6_ring_get_seqno(struct intel_ring_buffer *ring)
+gen6_ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
- struct drm_device *dev = ring->dev;
-
/* Workaround to force correct ordering between irq and seqno writes on
* ivb (and maybe also on snb) by reading from a CS register (like
* ACTHD) before reading the status page. */
- if (IS_GEN6(dev) || IS_GEN7(dev))
+ if (!lazy_coherency)
intel_ring_get_active_head(ring);
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
static u32
-ring_get_seqno(struct intel_ring_buffer *ring)
+ring_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
static u32
-pc_render_get_seqno(struct intel_ring_buffer *ring)
+pc_render_get_seqno(struct intel_ring_buffer *ring, bool lazy_coherency)
{
struct pipe_control *pc = ring->private;
return pc->cpu_page[0];
@@ -851,7 +927,7 @@ gen6_ring_get_irq(struct intel_ring_buffer *ring)
spin_lock_irqsave(&dev_priv->irq_lock, flags);
if (ring->irq_refcount++ == 0) {
- if (IS_IVYBRIDGE(dev) && ring->id == RCS)
+ if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
I915_WRITE_IMR(ring, ~(ring->irq_enable_mask |
GEN6_RENDER_L3_PARITY_ERROR));
else
@@ -874,7 +950,7 @@ gen6_ring_put_irq(struct intel_ring_buffer *ring)
spin_lock_irqsave(&dev_priv->irq_lock, flags);
if (--ring->irq_refcount == 0) {
- if (IS_IVYBRIDGE(dev) && ring->id == RCS)
+ if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
I915_WRITE_IMR(ring, ~GEN6_RENDER_L3_PARITY_ERROR);
else
I915_WRITE_IMR(ring, ~0);
@@ -950,7 +1026,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
if (obj == NULL)
return;
- kunmap(obj->pages[0]);
+ kunmap(sg_page(obj->pages->sgl));
i915_gem_object_unpin(obj);
drm_gem_object_unreference(&obj->base);
ring->status_page.obj = NULL;
@@ -971,13 +1047,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_pin(obj, 4096, true);
+ ret = i915_gem_object_pin(obj, 4096, true, false);
if (ret != 0) {
goto err_unref;
}
ring->status_page.gfx_addr = obj->gtt_offset;
- ring->status_page.page_addr = kmap(obj->pages[0]);
+ ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
if (ring->status_page.page_addr == NULL) {
ret = -ENOMEM;
goto err_unpin;
@@ -1009,7 +1085,6 @@ static int intel_init_ring_buffer(struct drm_device *dev,
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
- INIT_LIST_HEAD(&ring->gpu_write_list);
ring->size = 32 * PAGE_SIZE;
init_waitqueue_head(&ring->irq_queue);
@@ -1029,7 +1104,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
ring->obj = obj;
- ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
+ ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
if (ret)
goto err_unref;
@@ -1378,7 +1453,9 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
if (INTEL_INFO(dev)->gen >= 6) {
ring->add_request = gen6_add_request;
- ring->flush = gen6_render_ring_flush;
+ ring->flush = gen7_render_ring_flush;
+ if (INTEL_INFO(dev)->gen == 6)
+ ring->flush = gen6_render_ring_flush;
ring->irq_get = gen6_ring_get_irq;
ring->irq_put = gen6_ring_put_irq;
ring->irq_enable_mask = GT_USER_INTERRUPT;
@@ -1480,7 +1557,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
- INIT_LIST_HEAD(&ring->gpu_write_list);
ring->size = size;
ring->effective_size = ring->size;
@@ -1573,3 +1649,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev)
return intel_init_ring_buffer(dev, ring);
}
+
+int
+intel_ring_flush_all_caches(struct intel_ring_buffer *ring)
+{
+ int ret;
+
+ if (!ring->gpu_caches_dirty)
+ return 0;
+
+ ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
+ if (ret)
+ return ret;
+
+ trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
+
+ ring->gpu_caches_dirty = false;
+ return 0;
+}
+
+int
+intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring)
+{
+ uint32_t flush_domains;
+ int ret;
+
+ flush_domains = 0;
+ if (ring->gpu_caches_dirty)
+ flush_domains = I915_GEM_GPU_DOMAINS;
+
+ ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+ if (ret)
+ return ret;
+
+ trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
+
+ ring->gpu_caches_dirty = false;
+ return 0;
+}
OpenPOWER on IntegriCloud