summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gvt/scheduler.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c')
-rw-r--r--drivers/gpu/drm/i915/gvt/scheduler.c415
1 files changed, 259 insertions, 156 deletions
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 05b953793316..5b2a7d072ec9 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -35,6 +35,11 @@
#include <linux/kthread.h>
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+#include "gt/intel_ring.h"
+
#include "i915_drv.h"
#include "gvt.h"
@@ -80,8 +85,8 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload,
u32 *reg_state, bool save)
{
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
- u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
- u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+ u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset;
+ u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset;
int i = 0;
u32 flex_mmio[] = {
i915_mmio_reg_offset(EU_PERF_CNTL0),
@@ -93,7 +98,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload,
i915_mmio_reg_offset(EU_PERF_CNTL6),
};
- if (workload->ring_id != RCS)
+ if (workload->ring_id != RCS0)
return;
if (save) {
@@ -149,7 +154,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
COPY_REG_MASKED(ctx_ctrl);
COPY_REG(ctx_timestamp);
- if (ring_id == RCS) {
+ if (ring_id == RCS0) {
COPY_REG(bb_per_ctx_ptr);
COPY_REG(rcs_indirect_ctx);
COPY_REG(rcs_indirect_ctx_offset);
@@ -177,7 +182,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
context_page_num = context_page_num >> PAGE_SHIFT;
- if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
+ if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS0)
context_page_num = 19;
i = 2;
@@ -190,7 +195,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
return -EFAULT;
}
- page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
+ page = i915_gem_object_get_page(ctx_obj, i);
dst = kmap(page);
intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
I915_GTT_PAGE_SIZE);
@@ -277,17 +282,19 @@ static int shadow_context_status_change(struct notifier_block *nb,
return NOTIFY_OK;
}
-static void shadow_context_descriptor_update(struct intel_context *ce)
+static void
+shadow_context_descriptor_update(struct intel_context *ce,
+ struct intel_vgpu_workload *workload)
{
- u64 desc = 0;
+ u64 desc = ce->lrc_desc;
- desc = ce->lrc_desc;
-
- /* Update bits 0-11 of the context descriptor which includes flags
+ /*
+ * Update bits 0-11 of the context descriptor which includes flags
* like GEN8_CTX_* cached in desc_template
*/
- desc &= U64_MAX << 12;
- desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1);
+ desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
+ desc |= workload->ctx_desc.addressing_mode <<
+ GEN8_CTX_ADDRESSING_MODE_SHIFT;
ce->lrc_desc = desc;
}
@@ -298,12 +305,29 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
struct i915_request *req = workload->req;
void *shadow_ring_buffer_va;
u32 *cs;
+ int err;
- if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915)
- || IS_COFFEELAKE(req->i915))
- && is_inhibit_context(req->hw_context))
+ if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context))
intel_vgpu_restore_inhibit_context(vgpu, req);
+ /*
+ * To track whether a request has started on HW, we can emit a
+ * breadcrumb at the beginning of the request and check its
+ * timeline's HWSP to see if the breadcrumb has advanced past the
+ * start of this request. Actually, the request must have the
+ * init_breadcrumb if its timeline set has_init_bread_crumb, or the
+ * scheduler might get a wrong state of it during reset. Since the
+ * requests from gvt always set the has_init_breadcrumb flag, here
+ * need to do the emit_init_breadcrumb for all the requests.
+ */
+ if (req->engine->emit_init_breadcrumb) {
+ err = req->engine->emit_init_breadcrumb(req);
+ if (err) {
+ gvt_vgpu_err("fail to emit init breadcrumb\n");
+ return err;
+ }
+ }
+
/* allocate shadow ring buffer */
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
if (IS_ERR(cs)) {
@@ -338,26 +362,26 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
wa_ctx->indirect_ctx.shadow_va = NULL;
}
-static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
- struct i915_gem_context *ctx)
+static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
+ struct i915_gem_context *ctx)
{
struct intel_vgpu_mm *mm = workload->shadow_mm;
- struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+ struct i915_ppgtt *ppgtt =
+ i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx));
int i = 0;
- if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed)
- return -EINVAL;
-
if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
- px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0];
+ px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0];
} else {
for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
- px_dma(ppgtt->pdp.page_directory[i]) =
- mm->ppgtt_mm.shadow_pdps[i];
+ struct i915_page_directory * const pd =
+ i915_pd_entry(ppgtt->pd, i);
+
+ px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i];
}
}
- return 0;
+ i915_vm_put(&ppgtt->vm);
}
static int
@@ -365,26 +389,19 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
- struct i915_gem_context *shadow_ctx = s->shadow_ctx;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
struct i915_request *rq;
- int ret = 0;
-
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
if (workload->req)
- goto out;
+ return 0;
- rq = i915_request_alloc(engine, shadow_ctx);
+ rq = i915_request_create(s->shadow[workload->ring_id]);
if (IS_ERR(rq)) {
gvt_vgpu_err("fail to allocate gem request\n");
- ret = PTR_ERR(rq);
- goto out;
+ return PTR_ERR(rq);
}
+
workload->req = i915_request_get(rq);
-out:
- return ret;
+ return 0;
}
/**
@@ -399,43 +416,22 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_vgpu_submission *s = &vgpu->submission;
- struct i915_gem_context *shadow_ctx = s->shadow_ctx;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
- struct intel_context *ce;
int ret;
- lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ lockdep_assert_held(&vgpu->vgpu_lock);
if (workload->shadow)
return 0;
- /* pin shadow context by gvt even the shadow context will be pinned
- * when i915 alloc request. That is because gvt will update the guest
- * context from shadow context when workload is completed, and at that
- * moment, i915 may already unpined the shadow context to make the
- * shadow_ctx pages invalid. So gvt need to pin itself. After update
- * the guest context, gvt can unpin the shadow_ctx safely.
- */
- ce = intel_context_pin(shadow_ctx, engine);
- if (IS_ERR(ce)) {
- gvt_vgpu_err("fail to pin shadow context\n");
- return PTR_ERR(ce);
- }
-
- shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
- shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
- GEN8_CTX_ADDRESSING_MODE_SHIFT;
-
if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated))
- shadow_context_descriptor_update(ce);
+ shadow_context_descriptor_update(s->shadow[workload->ring_id],
+ workload);
ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
if (ret)
- goto err_unpin;
+ return ret;
- if ((workload->ring_id == RCS) &&
- (workload->wa_ctx.indirect_ctx.size != 0)) {
+ if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) {
ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
if (ret)
goto err_shadow;
@@ -445,8 +441,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
return 0;
err_shadow:
release_shadow_wa_ctx(&workload->wa_ctx);
-err_unpin:
- intel_context_unpin(ce);
return ret;
}
@@ -485,7 +479,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
bb->obj->base.size);
bb->clflush &= ~CLFLUSH_AFTER;
}
- i915_gem_obj_finish_shmem_access(bb->obj);
+ i915_gem_object_finish_access(bb->obj);
bb->accessing = false;
} else {
@@ -509,18 +503,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
}
ret = i915_gem_object_set_to_gtt_domain(bb->obj,
- false);
+ false);
if (ret)
goto err;
- i915_gem_obj_finish_shmem_access(bb->obj);
- bb->accessing = false;
-
ret = i915_vma_move_to_active(bb->vma,
workload->req,
0);
if (ret)
goto err;
+
+ i915_gem_object_finish_access(bb->obj);
+ bb->accessing = false;
}
}
return 0;
@@ -574,10 +568,18 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
return 0;
}
-static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
+static void update_vreg_in_ctx(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+ u32 ring_base;
+
+ ring_base = dev_priv->engine[workload->ring_id]->mmio_base;
+ vgpu_vreg_t(vgpu, RING_START(ring_base)) = workload->rb_start;
+}
+
+static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
+{
struct intel_vgpu_shadow_bb *bb, *pos;
if (list_empty(&workload->shadow_bb))
@@ -586,12 +588,10 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
bb = list_first_entry(&workload->shadow_bb,
struct intel_vgpu_shadow_bb, list);
- mutex_lock(&dev_priv->drm.struct_mutex);
-
list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
if (bb->obj) {
if (bb->accessing)
- i915_gem_obj_finish_shmem_access(bb->obj);
+ i915_gem_object_finish_access(bb->obj);
if (bb->va && !IS_ERR(bb->va))
i915_gem_object_unpin_map(bb->obj);
@@ -600,18 +600,18 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
i915_vma_unpin(bb->vma);
i915_vma_close(bb->vma);
}
- __i915_gem_object_release_unless_active(bb->obj);
+ i915_gem_object_put(bb->obj);
}
list_del(&bb->list);
kfree(bb);
}
-
- mutex_unlock(&dev_priv->drm.struct_mutex);
}
static int prepare_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
+ struct intel_vgpu_submission *s = &vgpu->submission;
+ int ring = workload->ring_id;
int ret = 0;
ret = intel_vgpu_pin_mm(workload->shadow_mm);
@@ -620,8 +620,16 @@ static int prepare_workload(struct intel_vgpu_workload *workload)
return ret;
}
+ if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
+ !workload->shadow_mm->ppgtt_mm.shadowed) {
+ gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+ return -EINVAL;
+ }
+
update_shadow_pdps(workload);
+ set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context);
+
ret = intel_vgpu_sync_oos_pages(workload->vgpu);
if (ret) {
gvt_vgpu_err("fail to vgpu sync oos pages\n");
@@ -671,9 +679,6 @@ err_unpin_mm:
static int dispatch_workload(struct intel_vgpu_workload *workload)
{
struct intel_vgpu *vgpu = workload->vgpu;
- struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
- struct intel_vgpu_submission *s = &vgpu->submission;
- struct i915_gem_context *shadow_ctx = s->shadow_ctx;
struct i915_request *rq;
int ring_id = workload->ring_id;
int ret;
@@ -682,13 +687,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
ring_id, workload);
mutex_lock(&vgpu->vgpu_lock);
- mutex_lock(&dev_priv->drm.struct_mutex);
-
- ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
- if (ret < 0) {
- gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
- goto err_req;
- }
ret = intel_gvt_workload_req_alloc(workload);
if (ret)
@@ -723,7 +721,6 @@ out:
err_req:
if (ret)
workload->status = ret;
- mutex_unlock(&dev_priv->drm.struct_mutex);
mutex_unlock(&vgpu->vgpu_lock);
return ret;
}
@@ -796,14 +793,35 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
void *src;
unsigned long context_gpa, context_page_num;
int i;
+ struct drm_i915_private *dev_priv = gvt->dev_priv;
+ u32 ring_base;
+ u32 head, tail;
+ u16 wrap_count;
gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
workload->ctx_desc.lrca);
+ head = workload->rb_head;
+ tail = workload->rb_tail;
+ wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
+
+ if (tail < head) {
+ if (wrap_count == RB_HEAD_WRAP_CNT_MAX)
+ wrap_count = 0;
+ else
+ wrap_count += 1;
+ }
+
+ head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail;
+
+ ring_base = dev_priv->engine[workload->ring_id]->mmio_base;
+ vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail;
+ vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head;
+
context_page_num = rq->engine->context_size;
context_page_num = context_page_num >> PAGE_SHIFT;
- if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS)
+ if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS0)
context_page_num = 19;
i = 2;
@@ -817,7 +835,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
return;
}
- page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
+ page = i915_gem_object_get_page(ctx_obj, i);
src = kmap(page);
intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
I915_GTT_PAGE_SIZE);
@@ -851,16 +869,16 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
}
void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
- unsigned long engine_mask)
+ intel_engine_mask_t engine_mask)
{
struct intel_vgpu_submission *s = &vgpu->submission;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
struct intel_engine_cs *engine;
struct intel_vgpu_workload *pos, *n;
- unsigned int tmp;
+ intel_engine_mask_t tmp;
/* free the unsubmited workloads in the queues. */
- for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
list_for_each_entry_safe(pos, n,
&s->workload_q_head[engine->id], list) {
list_del_init(&pos->list);
@@ -903,8 +921,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
workload->status = 0;
}
- if (!workload->status && !(vgpu->resetting_eng &
- ENGINE_MASK(ring_id))) {
+ if (!workload->status &&
+ !(vgpu->resetting_eng & BIT(ring_id))) {
update_guest_context(workload);
for_each_set_bit(event, workload->pending_events,
@@ -912,11 +930,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
intel_vgpu_trigger_virtual_event(vgpu, event);
}
- /* unpin shadow ctx as the shadow_ctx update is done */
- mutex_lock(&rq->i915->drm.struct_mutex);
- intel_context_unpin(rq->hw_context);
- mutex_unlock(&rq->i915->drm.struct_mutex);
-
i915_request_put(fetch_and_zero(&workload->req));
}
@@ -927,7 +940,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
list_del_init(&workload->list);
- if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
+ if (workload->status || vgpu->resetting_eng & BIT(ring_id)) {
/* if workload->status is not successful means HW GPU
* has occurred GPU hang or something wrong with i915/GVT,
* and GVT won't inject context switch interrupt to guest.
@@ -941,7 +954,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
* cleaned up during the resetting process later, so doing
* the workload clean up here doesn't have any impact.
**/
- intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id));
+ intel_vgpu_clean_workloads(vgpu, BIT(ring_id));
}
workload->complete(workload);
@@ -972,6 +985,7 @@ static int workload_thread(void *priv)
int ret;
bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+ struct intel_runtime_pm *rpm = &gvt->dev_priv->runtime_pm;
kfree(p);
@@ -995,14 +1009,21 @@ static int workload_thread(void *priv)
workload->ring_id, workload,
workload->vgpu->id);
- intel_runtime_pm_get(gvt->dev_priv);
+ intel_runtime_pm_get(rpm);
gvt_dbg_sched("ring id %d will dispatch workload %p\n",
workload->ring_id, workload);
if (need_force_wake)
- intel_uncore_forcewake_get(gvt->dev_priv,
+ intel_uncore_forcewake_get(&gvt->dev_priv->uncore,
FORCEWAKE_ALL);
+ /*
+ * Update the vReg of the vGPU which submitted this
+ * workload. The vGPU may use these registers for checking
+ * the context state. The value comes from GPU commands
+ * in this workload.
+ */
+ update_vreg_in_ctx(workload);
ret = dispatch_workload(workload);
@@ -1023,10 +1044,10 @@ complete:
complete_current_workload(gvt, ring_id);
if (need_force_wake)
- intel_uncore_forcewake_put(gvt->dev_priv,
+ intel_uncore_forcewake_put(&gvt->dev_priv->uncore,
FORCEWAKE_ALL);
- intel_runtime_pm_put_unchecked(gvt->dev_priv);
+ intel_runtime_pm_put_unchecked(rpm);
if (ret && (vgpu_is_vm_unhealthy(ret)))
enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
}
@@ -1109,17 +1130,20 @@ err:
}
static void
-i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
+ struct i915_ppgtt *ppgtt)
{
- struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
int i;
- if (i915_vm_is_48bit(&i915_ppgtt->vm))
- px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
- else {
- for (i = 0; i < GEN8_3LVL_PDPES; i++)
- px_dma(i915_ppgtt->pdp.page_directory[i]) =
- s->i915_context_pdps[i];
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ px_dma(ppgtt->pd) = s->i915_context_pml4;
+ } else {
+ for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+ struct i915_page_directory * const pd =
+ i915_pd_entry(ppgtt->pd, i);
+
+ px_dma(pd) = s->i915_context_pdps[i];
+ }
}
}
@@ -1133,10 +1157,15 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
{
struct intel_vgpu_submission *s = &vgpu->submission;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
- i915_context_ppgtt_root_restore(s);
- i915_gem_context_put(s->shadow_ctx);
+
+ i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
+ for_each_engine(engine, vgpu->gvt->dev_priv, id)
+ intel_context_unpin(s->shadow[id]);
+
kmem_cache_destroy(s->workloads);
}
@@ -1150,7 +1179,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
*
*/
void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
- unsigned long engine_mask)
+ intel_engine_mask_t engine_mask)
{
struct intel_vgpu_submission *s = &vgpu->submission;
@@ -1162,17 +1191,20 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
}
static void
-i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
+i915_context_ppgtt_root_save(struct intel_vgpu_submission *s,
+ struct i915_ppgtt *ppgtt)
{
- struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
int i;
- if (i915_vm_is_48bit(&i915_ppgtt->vm))
- s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
- else {
- for (i = 0; i < GEN8_3LVL_PDPES; i++)
- s->i915_context_pdps[i] =
- px_dma(i915_ppgtt->pdp.page_directory[i]);
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ s->i915_context_pml4 = px_dma(ppgtt->pd);
+ } else {
+ for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+ struct i915_page_directory * const pd =
+ i915_pd_entry(ppgtt->pd, i);
+
+ s->i915_context_pdps[i] = px_dma(pd);
+ }
}
}
@@ -1188,17 +1220,48 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
*/
int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
{
+ struct drm_i915_private *i915 = vgpu->gvt->dev_priv;
struct intel_vgpu_submission *s = &vgpu->submission;
- enum intel_engine_id i;
struct intel_engine_cs *engine;
+ struct i915_gem_context *ctx;
+ struct i915_ppgtt *ppgtt;
+ enum intel_engine_id i;
int ret;
- s->shadow_ctx = i915_gem_context_create_gvt(
- &vgpu->gvt->dev_priv->drm);
- if (IS_ERR(s->shadow_ctx))
- return PTR_ERR(s->shadow_ctx);
+ ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ i915_gem_context_set_force_single_submission(ctx);
+
+ ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx));
+ i915_context_ppgtt_root_save(s, ppgtt);
- i915_context_ppgtt_root_save(s);
+ for_each_engine(engine, i915, i) {
+ struct intel_context *ce;
+
+ INIT_LIST_HEAD(&s->workload_q_head[i]);
+ s->shadow[i] = ERR_PTR(-EINVAL);
+
+ ce = intel_context_create(ctx, engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ goto out_shadow_ctx;
+ }
+
+ if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */
+ const unsigned int ring_size = 512 * SZ_4K;
+
+ ce->ring = __intel_context_ring_size(ring_size);
+ }
+
+ ret = intel_context_pin(ce);
+ intel_context_put(ce);
+ if (ret)
+ goto out_shadow_ctx;
+
+ s->shadow[i] = ce;
+ }
bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
@@ -1214,16 +1277,24 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
goto out_shadow_ctx;
}
- for_each_engine(engine, vgpu->gvt->dev_priv, i)
- INIT_LIST_HEAD(&s->workload_q_head[i]);
-
atomic_set(&s->running_workload_num, 0);
bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
+ i915_vm_put(&ppgtt->vm);
+ i915_gem_context_put(ctx);
return 0;
out_shadow_ctx:
- i915_gem_context_put(s->shadow_ctx);
+ i915_context_ppgtt_root_restore(s, ppgtt);
+ for_each_engine(engine, i915, i) {
+ if (IS_ERR(s->shadow[i]))
+ break;
+
+ intel_context_unpin(s->shadow[i]);
+ intel_context_put(s->shadow[i]);
+ }
+ i915_vm_put(&ppgtt->vm);
+ i915_gem_context_put(ctx);
return ret;
}
@@ -1240,7 +1311,7 @@ out_shadow_ctx:
*
*/
int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
- unsigned long engine_mask,
+ intel_engine_mask_t engine_mask,
unsigned int interface)
{
struct intel_vgpu_submission *s = &vgpu->submission;
@@ -1344,7 +1415,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
struct intel_vgpu_mm *mm;
struct intel_vgpu *vgpu = workload->vgpu;
- intel_gvt_gtt_type_t root_entry_type;
+ enum intel_gvt_gtt_type root_entry_type;
u64 pdps[GVT_RING_CTX_NR_PDPS];
switch (desc->addressing_mode) {
@@ -1372,9 +1443,6 @@ static int prepare_mm(struct intel_vgpu_workload *workload)
#define same_context(a, b) (((a)->context_id == (b)->context_id) && \
((a)->lrca == (b)->lrca))
-#define get_last_workload(q) \
- (list_empty(q) ? NULL : container_of(q->prev, \
- struct intel_vgpu_workload, list))
/**
* intel_vgpu_create_workload - create a vGPU workload
* @vgpu: a vGPU
@@ -1394,11 +1462,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
{
struct intel_vgpu_submission *s = &vgpu->submission;
struct list_head *q = workload_q_head(vgpu, ring_id);
- struct intel_vgpu_workload *last_workload = get_last_workload(q);
+ struct intel_vgpu_workload *last_workload = NULL;
struct intel_vgpu_workload *workload = NULL;
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
u64 ring_context_gpa;
u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
+ u32 guest_head;
int ret;
ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
@@ -1414,18 +1483,25 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ring_tail.val), &tail, 4);
+ guest_head = head;
+
head &= RB_HEAD_OFF_MASK;
tail &= RB_TAIL_OFF_MASK;
- if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
- gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
- gvt_dbg_el("ctx head %x real head %lx\n", head,
- last_workload->rb_tail);
- /*
- * cannot use guest context head pointer here,
- * as it might not be updated at this time
- */
- head = last_workload->rb_tail;
+ list_for_each_entry_reverse(last_workload, q, list) {
+
+ if (same_context(&last_workload->ctx_desc, desc)) {
+ gvt_dbg_el("ring id %d cur workload == last\n",
+ ring_id);
+ gvt_dbg_el("ctx head %x real head %lx\n", head,
+ last_workload->rb_tail);
+ /*
+ * cannot use guest context head pointer here,
+ * as it might not be updated at this time
+ */
+ head = last_workload->rb_tail;
+ break;
+ }
}
gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
@@ -1438,6 +1514,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
+ if (!intel_gvt_ggtt_validate_range(vgpu, start,
+ _RING_CTL_BUF_SIZE(ctl))) {
+ gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start);
+ return ERR_PTR(-EINVAL);
+ }
+
workload = alloc_workload(vgpu);
if (IS_ERR(workload))
return workload;
@@ -1446,11 +1528,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
workload->ctx_desc = *desc;
workload->ring_context_gpa = ring_context_gpa;
workload->rb_head = head;
+ workload->guest_rb_head = guest_head;
workload->rb_tail = tail;
workload->rb_start = start;
workload->rb_ctl = ctl;
- if (ring_id == RCS) {
+ if (ring_id == RCS0) {
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
@@ -1461,9 +1544,31 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
workload->wa_ctx.indirect_ctx.size =
(indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
CACHELINE_BYTES;
+
+ if (workload->wa_ctx.indirect_ctx.size != 0) {
+ if (!intel_gvt_ggtt_validate_range(vgpu,
+ workload->wa_ctx.indirect_ctx.guest_gma,
+ workload->wa_ctx.indirect_ctx.size)) {
+ gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n",
+ workload->wa_ctx.indirect_ctx.guest_gma);
+ kmem_cache_free(s->workloads, workload);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
workload->wa_ctx.per_ctx.guest_gma =
per_ctx & PER_CTX_ADDR_MASK;
workload->wa_ctx.per_ctx.valid = per_ctx & 1;
+ if (workload->wa_ctx.per_ctx.valid) {
+ if (!intel_gvt_ggtt_validate_range(vgpu,
+ workload->wa_ctx.per_ctx.guest_gma,
+ CACHELINE_BYTES)) {
+ gvt_vgpu_err("invalid per_ctx at: 0x%lx\n",
+ workload->wa_ctx.per_ctx.guest_gma);
+ kmem_cache_free(s->workloads, workload);
+ return ERR_PTR(-EINVAL);
+ }
+ }
}
gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
@@ -1479,11 +1584,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
* as there is only one pre-allocated buf-obj for shadow.
*/
if (list_empty(workload_q_head(vgpu, ring_id))) {
- intel_runtime_pm_get(dev_priv);
- mutex_lock(&dev_priv->drm.struct_mutex);
+ intel_runtime_pm_get(&dev_priv->runtime_pm);
ret = intel_gvt_scan_and_shadow_workload(workload);
- mutex_unlock(&dev_priv->drm.struct_mutex);
- intel_runtime_pm_put_unchecked(dev_priv);
+ intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
}
if (ret) {
OpenPOWER on IntegriCloud