summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/intel_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_lrc.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c486
1 files changed, 364 insertions, 122 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d9061d9348cb..d42584439f51 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -136,9 +136,12 @@
#include "gem/i915_gem_context.h"
#include "i915_drv.h"
+#include "i915_perf.h"
+#include "i915_trace.h"
#include "i915_vgpu.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
+#include "intel_gt_pm.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
@@ -163,6 +166,13 @@
#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
+#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
+#define GEN12_IDLE_CTX_ID 0x7FF
+#define GEN12_CSB_CTX_VALID(csb_dw) \
+ (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
+
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
#define WA_TAIL_DWORDS 2
@@ -216,8 +226,9 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
return container_of(engine, struct virtual_engine, base);
}
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine);
+static int __execlists_context_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+
static void execlists_init_reg_state(u32 *reg_state,
struct intel_context *ce,
struct intel_engine_cs *engine,
@@ -417,13 +428,17 @@ lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
- desc = ctx->desc_template; /* bits 0-11 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
+ desc = INTEL_LEGACY_32B_CONTEXT;
+ if (i915_vm_is_4lvl(ce->vm))
+ desc = INTEL_LEGACY_64B_CONTEXT;
+ desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+ desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+ if (IS_GEN(engine->i915, 8))
+ desc |= GEN8_CTX_L3LLC_COHERENT;
desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
/* bits 12-31 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
-
/*
* The following 32bits are copied into the OA reports (dword 2).
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
@@ -539,26 +554,39 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+ struct intel_engine_cs * const engine = rq->engine;
+ struct intel_context * const ce = rq->hw_context;
+
+ intel_context_get(ce);
+
+ intel_gt_pm_get(engine->gt);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
+ intel_engine_context_in(engine);
+
+ return engine;
+}
+
static inline struct i915_request *
execlists_schedule_in(struct i915_request *rq, int idx)
{
- struct intel_context *ce = rq->hw_context;
- int count;
+ struct intel_context * const ce = rq->hw_context;
+ struct intel_engine_cs *old;
+ GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
trace_i915_request_in(rq, idx);
- count = intel_context_inflight_count(ce);
- if (!count) {
- intel_context_get(ce);
- ce->inflight = rq->engine;
-
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
- intel_engine_context_in(ce->inflight);
- }
+ old = READ_ONCE(ce->inflight);
+ do {
+ if (!old) {
+ WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+ break;
+ }
+ } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
- intel_context_inflight_inc(ce);
GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
-
return i915_request_get(rq);
}
@@ -572,34 +600,45 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
}
static inline void
-execlists_schedule_out(struct i915_request *rq)
+__execlists_schedule_out(struct i915_request *rq,
+ struct intel_engine_cs * const engine)
{
- struct intel_context *ce = rq->hw_context;
+ struct intel_context * const ce = rq->hw_context;
- GEM_BUG_ON(!intel_context_inflight_count(ce));
+ intel_engine_context_out(engine);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ intel_gt_pm_put(engine->gt);
- trace_i915_request_out(rq);
+ /*
+ * If this is part of a virtual engine, its next request may
+ * have been blocked waiting for access to the active context.
+ * We have to kick all the siblings again in case we need to
+ * switch (e.g. the next request is not runnable on this
+ * engine). Hopefully, we will already have submitted the next
+ * request before the tasklet runs and do not need to rebuild
+ * each virtual tree and kick everyone again.
+ */
+ if (ce->engine != engine)
+ kick_siblings(rq, ce);
- intel_context_inflight_dec(ce);
- if (!intel_context_inflight_count(ce)) {
- intel_engine_context_out(ce->inflight);
- execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ intel_context_put(ce);
+}
- /*
- * If this is part of a virtual engine, its next request may
- * have been blocked waiting for access to the active context.
- * We have to kick all the siblings again in case we need to
- * switch (e.g. the next request is not runnable on this
- * engine). Hopefully, we will already have submitted the next
- * request before the tasklet runs and do not need to rebuild
- * each virtual tree and kick everyone again.
- */
- ce->inflight = NULL;
- if (rq->engine != ce->engine)
- kick_siblings(rq, ce);
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+ struct intel_context * const ce = rq->hw_context;
+ struct intel_engine_cs *cur, *old;
- intel_context_put(ce);
- }
+ trace_i915_request_out(rq);
+ GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+
+ old = READ_ONCE(ce->inflight);
+ do
+ cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+ while (!try_cmpxchg(&ce->inflight, &old, cur));
+ if (!cur)
+ __execlists_schedule_out(rq, old);
i915_request_put(rq);
}
@@ -674,6 +713,9 @@ assert_pending_valid(const struct intel_engine_execlists *execlists,
trace_ports(execlists, msg, execlists->pending);
+ if (!execlists->pending[0])
+ return false;
+
if (execlists->pending[execlists_num_ports(execlists)])
return false;
@@ -931,12 +973,24 @@ need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
return hint >= effective_prio(rq);
}
+static int
+switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+ if (list_is_last(&rq->sched.link, &engine->active.requests))
+ return INT_MIN;
+
+ return rq_prio(list_next_entry(rq, sched.link));
+}
+
static bool
-enable_timeslice(struct intel_engine_cs *engine)
+enable_timeslice(const struct intel_engine_execlists *execlists)
{
- struct i915_request *last = last_active(&engine->execlists);
+ const struct i915_request *rq = *execlists->active;
+
+ if (i915_request_completed(rq))
+ return false;
- return last && need_timeslice(engine, last);
+ return execlists->switch_priority_hint >= effective_prio(rq);
}
static void record_preemption(struct intel_engine_execlists *execlists)
@@ -1281,14 +1335,16 @@ done:
if (submit) {
*port = execlists_schedule_in(last, port - execlists->pending);
memset(port + 1, 0, (last_port - port) * sizeof(*port));
+ execlists->switch_priority_hint =
+ switch_prio(engine, *execlists->pending);
execlists_submit_ports(engine);
} else {
ring_set_paused(engine, 0);
}
}
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
{
struct i915_request * const *port, *rq;
@@ -1322,8 +1378,71 @@ enum csb_step {
CSB_COMPLETE,
};
+/*
+ * Starting with Gen12, the status has a new format:
+ *
+ * bit 0: switched to new queue
+ * bit 1: reserved
+ * bit 2: semaphore wait mode (poll or signal), only valid when
+ * switch detail is set to "wait on semaphore"
+ * bits 3-5: engine class
+ * bits 6-11: engine instance
+ * bits 12-14: reserved
+ * bits 15-25: sw context id of the lrc the GT switched to
+ * bits 26-31: sw counter of the lrc the GT switched to
+ * bits 32-35: context switch detail
+ * - 0: ctx complete
+ * - 1: wait on sync flip
+ * - 2: wait on vblank
+ * - 3: wait on scanline
+ * - 4: wait on semaphore
+ * - 5: context preempted (not on SEMAPHORE_WAIT or
+ * WAIT_FOR_EVENT)
+ * bit 36: reserved
+ * bits 37-43: wait detail (for switch detail 1 to 4)
+ * bits 44-46: reserved
+ * bits 47-57: sw context id of the lrc the GT switched away from
+ * bits 58-63: sw counter of the lrc the GT switched away from
+ */
+static inline enum csb_step
+gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+ u32 lower_dw = csb[0];
+ u32 upper_dw = csb[1];
+ bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
+ bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
+ bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+
+ if (!ctx_away_valid && ctx_to_valid)
+ return CSB_PROMOTE;
+
+ /*
+ * The context switch detail is not guaranteed to be 5 when a preemption
+ * occurs, so we can't just check for that. The check below works for
+ * all the cases we care about, including preemptions of WAIT
+ * instructions and lite-restore. Preempt-to-idle via the CTRL register
+ * would require some extra handling, but we don't support that.
+ */
+ if (new_queue && ctx_away_valid)
+ return CSB_PREEMPT;
+
+ /*
+ * switch detail = 5 is covered by the case above and we do not expect a
+ * context switch on an unsuccessful wait instruction since we always
+ * use polling mode.
+ */
+ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+
+ if (*execlists->active) {
+ GEM_BUG_ON(!ctx_away_valid);
+ return CSB_COMPLETE;
+ }
+
+ return CSB_NOP;
+}
+
static inline enum csb_step
-csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
{
unsigned int status = *csb;
@@ -1346,7 +1465,6 @@ static void process_csb(struct intel_engine_cs *engine)
const u8 num_entries = execlists->csb_size;
u8 head, tail;
- lockdep_assert_held(&engine->active.lock);
GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
/*
@@ -1376,6 +1494,8 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
+ enum csb_step csb_step;
+
if (++head == num_entries)
head = 0;
@@ -1401,7 +1521,12 @@ static void process_csb(struct intel_engine_cs *engine)
engine->name, head,
buf[2 * head + 0], buf[2 * head + 1]);
- switch (csb_parse(execlists, buf + 2 * head)) {
+ if (INTEL_GEN(engine->i915) >= 12)
+ csb_step = gen12_csb_parse(execlists, buf + 2 * head);
+ else
+ csb_step = gen8_csb_parse(execlists, buf + 2 * head);
+
+ switch (csb_step) {
case CSB_PREEMPT: /* cancel old inflight, prepare for switch */
trace_ports(execlists, "preempted", execlists->active);
@@ -1417,15 +1542,14 @@ static void process_csb(struct intel_engine_cs *engine)
execlists->pending,
execlists_num_ports(execlists) *
sizeof(*execlists->pending));
- execlists->pending[0] = NULL;
-
- trace_ports(execlists, "promoted", execlists->active);
- if (enable_timeslice(engine))
+ if (enable_timeslice(execlists))
mod_timer(&execlists->timer, jiffies + 1);
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
+
+ WRITE_ONCE(execlists->pending[0], NULL);
break;
case CSB_COMPLETE: /* port0 completed, advanced to port1 */
@@ -1469,8 +1593,6 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
-
- process_csb(engine);
if (!engine->execlists.pending[0])
execlists_dequeue(engine);
}
@@ -1484,9 +1606,12 @@ static void execlists_submission_tasklet(unsigned long data)
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
unsigned long flags;
- spin_lock_irqsave(&engine->active.lock, flags);
- __execlists_submission_tasklet(engine);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ process_csb(engine);
+ if (!READ_ONCE(engine->execlists.pending[0])) {
+ spin_lock_irqsave(&engine->active.lock, flags);
+ __execlists_submission_tasklet(engine);
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+ }
}
static void execlists_submission_timer(struct timer_list *timer)
@@ -1569,10 +1694,41 @@ static void execlists_context_destroy(struct kref *kref)
intel_context_free(ce);
}
+static void
+set_redzone(void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
+ vaddr += engine->context_size;
+
+ memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+}
+
+static void
+check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += LRC_HEADER_PAGES * PAGE_SIZE;
+ vaddr += engine->context_size;
+
+ if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE))
+ dev_err_once(engine->i915->drm.dev,
+ "%s context redzone overwritten!\n",
+ engine->name);
+}
+
static void execlists_context_unpin(struct intel_context *ce)
{
+ check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+ ce->engine);
+
i915_gem_context_unpin_hw_id(ce->gem_context);
i915_gem_object_unpin_map(ce->state->obj);
+ intel_ring_reset(ce->ring, ce->ring->tail);
}
static void
@@ -1605,9 +1761,6 @@ __execlists_context_pin(struct intel_context *ce,
void *vaddr;
int ret;
- ret = execlists_context_deferred_alloc(ce, engine);
- if (ret)
- goto err;
GEM_BUG_ON(!ce->state);
ret = intel_context_active_acquire(ce);
@@ -1646,6 +1799,11 @@ static int execlists_context_pin(struct intel_context *ce)
return __execlists_context_pin(ce, ce->engine);
}
+static int execlists_context_alloc(struct intel_context *ce)
+{
+ return __execlists_context_alloc(ce, ce->engine);
+}
+
static void execlists_context_reset(struct intel_context *ce)
{
/*
@@ -1669,6 +1827,8 @@ static void execlists_context_reset(struct intel_context *ce)
}
static const struct intel_context_ops execlists_context_ops = {
+ .alloc = execlists_context_alloc,
+
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
@@ -2065,6 +2225,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return 0;
switch (INTEL_GEN(engine->i915)) {
+ case 12:
case 11:
return 0;
case 10:
@@ -2238,15 +2399,15 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static struct i915_request *active_request(struct i915_request *rq)
{
- const struct list_head * const list = &rq->engine->active.requests;
- const struct intel_context * const context = rq->hw_context;
+ const struct list_head * const list = &rq->timeline->requests;
+ const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;
- list_for_each_entry_from_reverse(rq, list, sched.link) {
+ list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq))
break;
- if (rq->hw_context != context)
+ if (rq->hw_context != ce)
break;
active = rq;
@@ -2280,18 +2441,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
GEM_BUG_ON(i915_active_is_idle(&ce->active));
GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
rq = active_request(rq);
-
- /*
- * Catch up with any missed context-switch interrupts.
- *
- * Ideally we would just read the remaining CSB entries now that we
- * know the gpu is idle. However, the CSB registers are sometimes^W
- * often trashed across a GPU reset! Instead we have to rely on
- * guessing the missed context-switch events by looking at what
- * requests were completed.
- */
- execlists_cancel_port_requests(execlists);
-
if (!rq) {
ce->ring->head = ce->ring->tail;
goto out_replay;
@@ -2353,6 +2502,7 @@ out_replay:
unwind:
/* Push back any incomplete requests for replay after the reset. */
+ cancel_port_requests(execlists);
__unwind_incomplete_requests(engine);
}
@@ -2652,6 +2802,63 @@ static int gen8_emit_flush_render(struct i915_request *request,
return 0;
}
+static int gen11_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ struct intel_engine_cs *engine = request->engine;
+ const u32 scratch_addr =
+ intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
+
+ if (mode & EMIT_FLUSH) {
+ u32 *cs;
+ u32 flags = 0;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ intel_ring_advance(request, cs);
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ u32 *cs;
+ u32 flags = 0;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ intel_ring_advance(request, cs);
+ }
+
+ return 0;
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -2680,12 +2887,10 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
return cs;
}
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static __always_inline u32*
+gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
+ u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- request->timeline->hwsp_offset,
- 0);
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -2698,35 +2903,53 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ 0);
+
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
- /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
request->timeline->hwsp_offset,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE);
+
+ /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL,
0);
- *cs++ = MI_USER_INTERRUPT;
- *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- if (intel_engine_has_semaphores(request->engine))
- cs = emit_preempt_busywait(request, cs);
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
- request->tail = intel_ring_offset(request, cs);
- assert_ring_tail_valid(request->ring, request->tail);
+static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
+ u32 *cs)
+{
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ request->timeline->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
- return gen8_emit_wa_tail(request, cs);
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
{
- del_timer_sync(&engine->execlists.timer);
- intel_engine_park(engine);
+ del_timer(&engine->execlists.timer);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
@@ -2817,11 +3040,23 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
}
-int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+static void rcs_submission_override(struct intel_engine_cs *engine)
{
- /* Intentionally left blank. */
- engine->buffer = NULL;
+ switch (INTEL_GEN(engine->i915)) {
+ case 12:
+ case 11:
+ engine->emit_flush = gen11_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
+ break;
+ default:
+ engine->emit_flush = gen8_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+ break;
+ }
+}
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+{
tasklet_init(&engine->execlists.tasklet,
execlists_submission_tasklet, (unsigned long)engine);
timer_setup(&engine->execlists.timer, execlists_submission_timer, 0);
@@ -2829,10 +3064,8 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
logical_ring_default_vfuncs(engine);
logical_ring_default_irqs(engine);
- if (engine->class == RENDER_CLASS) {
- engine->emit_flush = gen8_emit_flush_render;
- engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
- }
+ if (engine->class == RENDER_CLASS)
+ rcs_submission_override(engine);
return 0;
}
@@ -2891,6 +3124,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
default:
MISSING_CASE(INTEL_GEN(engine->i915));
/* fall through */
+ case 12:
+ indirect_ctx_offset =
+ GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
case 11:
indirect_ctx_offset =
GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@@ -3032,6 +3269,8 @@ populate_lr_context(struct intel_context *ce,
return ret;
}
+ set_redzone(vaddr, engine);
+
if (engine->default_state) {
/*
* We only want to copy over the template context state;
@@ -3069,28 +3308,16 @@ err_unpin_ctx:
return ret;
}
-static struct intel_timeline *
-get_timeline(struct i915_gem_context *ctx, struct intel_gt *gt)
-{
- if (ctx->timeline)
- return intel_timeline_get(ctx->timeline);
- else
- return intel_timeline_create(gt, NULL);
-}
-
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine)
+static int __execlists_context_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj;
+ struct intel_ring *ring;
struct i915_vma *vma;
u32 context_size;
- struct intel_ring *ring;
- struct intel_timeline *timeline;
int ret;
- if (ce->state)
- return 0;
-
+ GEM_BUG_ON(ce->state);
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
/*
@@ -3098,6 +3325,8 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
* for our own use and for sharing with the GuC.
*/
context_size += LRC_HEADER_PAGES * PAGE_SIZE;
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ context_size += I915_GTT_PAGE_SIZE; /* for redzone */
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(ctx_obj))
@@ -3109,16 +3338,19 @@ static int execlists_context_deferred_alloc(struct intel_context *ce,
goto error_deref_obj;
}
- timeline = get_timeline(ce->gem_context, engine->gt);
- if (IS_ERR(timeline)) {
- ret = PTR_ERR(timeline);
- goto error_deref_obj;
+ if (!ce->timeline) {
+ struct intel_timeline *tl;
+
+ tl = intel_timeline_create(engine->gt, NULL);
+ if (IS_ERR(tl)) {
+ ret = PTR_ERR(tl);
+ goto error_deref_obj;
+ }
+
+ ce->timeline = tl;
}
- ring = intel_engine_create_ring(engine,
- timeline,
- ce->gem_context->ring_size);
- intel_timeline_put(timeline);
+ ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
@@ -3229,6 +3461,8 @@ static void virtual_context_enter(struct intel_context *ce)
for (n = 0; n < ve->num_siblings; n++)
intel_engine_pm_get(ve->siblings[n]);
+
+ intel_timeline_enter(ce->timeline);
}
static void virtual_context_exit(struct intel_context *ce)
@@ -3236,6 +3470,8 @@ static void virtual_context_exit(struct intel_context *ce)
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
unsigned int n;
+ intel_timeline_exit(ce->timeline);
+
for (n = 0; n < ve->num_siblings; n++)
intel_engine_pm_put(ve->siblings[n]);
}
@@ -3536,6 +3772,12 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+ err = __execlists_context_alloc(&ve->context, siblings[0]);
+ if (err)
+ goto err_put;
+
+ __set_bit(CONTEXT_ALLOC_BIT, &ve->context.flags);
+
return &ve->context;
err_put:
OpenPOWER on IntegriCloud