summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c245
1 files changed, 145 insertions, 100 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7ece2f061b9e..380c0838d8b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -137,6 +137,7 @@
#include <drm/i915_drm.h>
#include "i915_drv.h"
#include "i915_gem_render_state.h"
+#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
@@ -156,55 +157,6 @@
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
-#define CTX_LRI_HEADER_0 0x01
-#define CTX_CONTEXT_CONTROL 0x02
-#define CTX_RING_HEAD 0x04
-#define CTX_RING_TAIL 0x06
-#define CTX_RING_BUFFER_START 0x08
-#define CTX_RING_BUFFER_CONTROL 0x0a
-#define CTX_BB_HEAD_U 0x0c
-#define CTX_BB_HEAD_L 0x0e
-#define CTX_BB_STATE 0x10
-#define CTX_SECOND_BB_HEAD_U 0x12
-#define CTX_SECOND_BB_HEAD_L 0x14
-#define CTX_SECOND_BB_STATE 0x16
-#define CTX_BB_PER_CTX_PTR 0x18
-#define CTX_RCS_INDIRECT_CTX 0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
-#define CTX_LRI_HEADER_1 0x21
-#define CTX_CTX_TIMESTAMP 0x22
-#define CTX_PDP3_UDW 0x24
-#define CTX_PDP3_LDW 0x26
-#define CTX_PDP2_UDW 0x28
-#define CTX_PDP2_LDW 0x2a
-#define CTX_PDP1_UDW 0x2c
-#define CTX_PDP1_LDW 0x2e
-#define CTX_PDP0_UDW 0x30
-#define CTX_PDP0_LDW 0x32
-#define CTX_LRI_HEADER_2 0x41
-#define CTX_R_PWR_CLK_STATE 0x42
-#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
- (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
- (reg_state)[(pos)+1] = (val); \
-} while (0)
-
-#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
- const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \
- reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
- reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-} while (0)
-
-#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
- reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
- reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-} while (0)
-
-#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
-#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
-#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
-
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
#define WA_TAIL_DWORDS 2
@@ -504,6 +456,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
ce->ring->tail &= (ce->ring->size - 1);
ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
+ GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
GEM_TRACE("%s\n", engine->name);
for (n = execlists_num_ports(&engine->execlists); --n; )
elsp_write(0, engine->execlists.elsp);
@@ -778,6 +736,7 @@ static void execlists_submission_tasklet(unsigned long data)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct execlist_port * const port = execlists->port;
struct drm_i915_private *dev_priv = engine->i915;
+ bool fw = false;
/* We can skip acquiring intel_runtime_pm_get() here as it was taken
* on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -788,8 +747,6 @@ static void execlists_submission_tasklet(unsigned long data)
*/
GEM_BUG_ON(!dev_priv->gt.awake);
- intel_uncore_forcewake_get(dev_priv, execlists->fw_domains);
-
/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
* imposing the cost of a locked atomic transaction when submitting a
* new request (outside of the context-switch interrupt).
@@ -818,6 +775,12 @@ static void execlists_submission_tasklet(unsigned long data)
*/
__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+ if (!fw) {
+ intel_uncore_forcewake_get(dev_priv,
+ execlists->fw_domains);
+ fw = true;
+ }
+
head = readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
@@ -830,10 +793,10 @@ static void execlists_submission_tasklet(unsigned long data)
head = execlists->csb_head;
tail = READ_ONCE(buf[write_idx]);
}
- GEM_TRACE("%s cs-irq head=%d [%d], tail=%d [%d]\n",
+ GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
engine->name,
- head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))),
- tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))));
+ head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+ tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
while (head != tail) {
struct drm_i915_gem_request *rq;
@@ -943,7 +906,8 @@ static void execlists_submission_tasklet(unsigned long data)
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
- intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
+ if (fw)
+ intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
static void insert_request(struct intel_engine_cs *engine,
@@ -1014,7 +978,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
stack.signaler = &request->priotree;
list_add(&stack.dfs_link, &dfs);
- /* Recursively bump all dependent priorities to match the new request.
+ /*
+ * Recursively bump all dependent priorities to match the new request.
*
* A naive approach would be to use recursion:
* static void update_priorities(struct i915_priotree *pt, prio) {
@@ -1031,27 +996,29 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
* end result is a topological list of requests in reverse order, the
* last element in the list is the request we must execute first.
*/
- list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
+ list_for_each_entry(dep, &dfs, dfs_link) {
struct i915_priotree *pt = dep->signaler;
- /* Within an engine, there can be no cycle, but we may
+ /*
+ * Within an engine, there can be no cycle, but we may
* refer to the same dependency chain multiple times
* (redundant dependencies are not eliminated) and across
* engines.
*/
list_for_each_entry(p, &pt->signalers_list, signal_link) {
- if (i915_gem_request_completed(pt_to_request(p->signaler)))
+ GEM_BUG_ON(p == dep); /* no cycles! */
+
+ if (i915_priotree_signaled(p->signaler))
continue;
GEM_BUG_ON(p->signaler->priority < pt->priority);
if (prio > READ_ONCE(p->signaler->priority))
list_move_tail(&p->dfs_link, &dfs);
}
-
- list_safe_reset_next(dep, p, dfs_link);
}
- /* If we didn't need to bump any existing priorities, and we haven't
+ /*
+ * If we didn't need to bump any existing priorities, and we haven't
* yet submitted this request (i.e. there is no potential race with
* execlists_submit_request()), we can set our own priority and skip
* acquiring the engine locks.
@@ -1125,11 +1092,9 @@ execlists_context_pin(struct intel_engine_cs *engine,
goto out;
GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
- if (!ce->state) {
- ret = execlists_context_deferred_alloc(ctx, engine);
- if (ret)
- goto err;
- }
+ ret = execlists_context_deferred_alloc(ctx, engine);
+ if (ret)
+ goto err;
GEM_BUG_ON(!ce->state);
ret = __context_pin(ctx, ce->state);
@@ -1363,6 +1328,40 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
return batch;
}
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+ int i;
+
+ /*
+ * WaPipeControlBefore3DStateSamplePattern: cnl
+ *
+ * Ensure the engine is idle prior to programming a
+ * 3DSTATE_SAMPLE_PATTERN during a context restore.
+ */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_CS_STALL,
+ 0);
+ /*
+ * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+ * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+ * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+ * confusing. Since gen8_emit_pipe_control() already advances the
+ * batch by 6 dwords, we advance the other 10 here, completing a
+ * cacheline. It's not clear if the workaround requires this padding
+ * before other commands, or if it's just the regular padding we would
+ * already have for the workaround bb, so leave it here for now.
+ */
+ for (i = 0; i < 10; i++)
+ *batch++ = MI_NOOP;
+
+ /* Pad to end of cacheline */
+ while ((unsigned long)batch % CACHELINE_BYTES)
+ *batch++ = MI_NOOP;
+
+ return batch;
+}
+
#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
@@ -1411,12 +1410,14 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
unsigned int i;
int ret;
- if (WARN_ON(engine->id != RCS || !engine->scratch))
+ if (GEM_WARN_ON(engine->id != RCS))
return -EINVAL;
switch (INTEL_GEN(engine->i915)) {
case 10:
- return 0;
+ wa_bb_fn[0] = gen10_init_indirectctx_bb;
+ wa_bb_fn[1] = NULL;
+ break;
case 9:
wa_bb_fn[0] = gen9_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
@@ -1446,7 +1447,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
*/
for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
wa_bb[i]->offset = batch_ptr - batch;
- if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) {
+ if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+ CACHELINE_BYTES))) {
ret = -EINVAL;
break;
}
@@ -1472,47 +1474,48 @@ static u8 gtiir[] = {
[VECS] = 3,
};
-static int gen8_init_common_ring(struct intel_engine_cs *engine)
+static void enable_execlists(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct intel_engine_execlists * const execlists = &engine->execlists;
- int ret;
- ret = intel_mocs_init_engine(engine);
- if (ret)
- return ret;
+ I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
- intel_engine_reset_breadcrumbs(engine);
- intel_engine_init_hangcheck(engine);
+ /*
+ * Make sure we're not enabling the new 12-deep CSB
+ * FIFO as that requires a slightly updated handling
+ * in the ctx switch irq. Since we're currently only
+ * using only 2 elements of the enhanced execlists the
+ * deeper FIFO it's not needed and it's not worth adding
+ * more statements to the irq handler to support it.
+ */
+ if (INTEL_GEN(dev_priv) >= 11)
+ I915_WRITE(RING_MODE_GEN7(engine),
+ _MASKED_BIT_DISABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ else
+ I915_WRITE(RING_MODE_GEN7(engine),
+ _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
- I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff);
- I915_WRITE(RING_MODE_GEN7(engine),
- _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
engine->status_page.ggtt_offset);
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
- DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name);
+ /* Following the reset, we need to reload the CSB read/write pointers */
+ engine->execlists.csb_head = -1;
+}
- GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+static int gen8_init_common_ring(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ int ret;
- /*
- * Clear any pending interrupt state.
- *
- * We do it twice out of paranoia that some of the IIR are double
- * buffered, and if we only reset it once there may still be
- * an interrupt pending.
- */
- I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
- GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
- I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
- GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
- clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
- execlists->csb_head = -1;
- execlists->active = 0;
+ ret = intel_mocs_init_engine(engine);
+ if (ret)
+ return ret;
+
+ intel_engine_reset_breadcrumbs(engine);
+ intel_engine_init_hangcheck(engine);
- execlists->elsp =
- dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+ enable_execlists(engine);
/* After a GPU reset, we may have requests to replay */
if (execlists->first)
@@ -1554,6 +1557,31 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine)
return init_workarounds_ring(engine);
}
+static void reset_irq(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *dev_priv = engine->i915;
+ int i;
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir));
+
+ /*
+ * Clear any pending interrupt state.
+ *
+ * We do it twice out of paranoia that some of the IIR are double
+ * buffered, and if we only reset it once there may still be
+ * an interrupt pending.
+ */
+ for (i = 0; i < 2; i++) {
+ I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]),
+ GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift);
+ POSTING_READ(GEN8_GT_IIR(gtiir[engine->id]));
+ }
+ GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) &
+ (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift));
+
+ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+}
+
static void reset_common_ring(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
@@ -1563,6 +1591,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
GEM_TRACE("%s seqno=%x\n",
engine->name, request ? request->global_seqno : 0);
+
+ reset_irq(engine);
+
spin_lock_irqsave(&engine->timeline->lock, flags);
/*
@@ -1581,6 +1612,9 @@ static void reset_common_ring(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&engine->timeline->lock, flags);
+ /* Mark all CS interrupts as complete */
+ execlists->active = 0;
+
/* If the request was innocent, we leave the request in the ELSP
* and will try to replay it on restarting. The context image may
* have been corrupted by the reset, in which case we may have
@@ -1912,6 +1946,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
+
engine->i915 = NULL;
dev_priv->engine[engine->id] = NULL;
kfree(engine);
@@ -2001,6 +2036,9 @@ static int logical_ring_init(struct intel_engine_cs *engine)
if (ret)
goto error;
+ engine->execlists.elsp =
+ engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+
return 0;
error:
@@ -2142,6 +2180,8 @@ static void execlists_init_reg_state(u32 *regs,
MI_LRI_FORCE_POSTED;
CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
+ _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT) |
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
(HAS_RESOURCE_STREAMER(dev_priv) ?
CTX_CTRL_RS_CTX_ENABLE : 0)));
@@ -2261,6 +2301,10 @@ populate_lr_context(struct i915_gem_context *ctx,
if (!engine->default_state)
regs[CTX_CONTEXT_CONTROL + 1] |=
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (ctx->hw_id == PREEMPT_ID)
+ regs[CTX_CONTEXT_CONTROL + 1] |=
+ _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+ CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
i915_gem_object_unpin_map(ctx_obj);
@@ -2277,7 +2321,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
struct intel_ring *ring;
int ret;
- WARN_ON(ce->state);
+ if (ce->state)
+ return 0;
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
OpenPOWER on IntegriCloud