diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/scheduler.c')
| -rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.c | 415 |
1 files changed, 259 insertions, 156 deletions
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 05b953793316..5b2a7d072ec9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -35,6 +35,11 @@ #include <linux/kthread.h> +#include "gem/i915_gem_context.h" +#include "gem/i915_gem_pm.h" +#include "gt/intel_context.h" +#include "gt/intel_ring.h" + #include "i915_drv.h" #include "gvt.h" @@ -80,8 +85,8 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, u32 *reg_state, bool save) { struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset; - u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset; + u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset; + u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset; int i = 0; u32 flex_mmio[] = { i915_mmio_reg_offset(EU_PERF_CNTL0), @@ -93,7 +98,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (workload->ring_id != RCS) + if (workload->ring_id != RCS0) return; if (save) { @@ -149,7 +154,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) COPY_REG_MASKED(ctx_ctrl); COPY_REG(ctx_timestamp); - if (ring_id == RCS) { + if (ring_id == RCS0) { COPY_REG(bb_per_ctx_ptr); COPY_REG(rcs_indirect_ctx); COPY_REG(rcs_indirect_ctx_offset); @@ -177,7 +182,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS0) context_page_num = 19; i = 2; @@ -190,7 +195,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return -EFAULT; } - page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, I915_GTT_PAGE_SIZE); @@ -277,17 +282,19 @@ static int shadow_context_status_change(struct notifier_block *nb, return NOTIFY_OK; } -static void shadow_context_descriptor_update(struct intel_context *ce) +static void +shadow_context_descriptor_update(struct intel_context *ce, + struct intel_vgpu_workload *workload) { - u64 desc = 0; + u64 desc = ce->lrc_desc; - desc = ce->lrc_desc; - - /* Update bits 0-11 of the context descriptor which includes flags + /* + * Update bits 0-11 of the context descriptor which includes flags * like GEN8_CTX_* cached in desc_template */ - desc &= U64_MAX << 12; - desc |= ce->gem_context->desc_template & ((1ULL << 12) - 1); + desc &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); + desc |= workload->ctx_desc.addressing_mode << + GEN8_CTX_ADDRESSING_MODE_SHIFT; ce->lrc_desc = desc; } @@ -298,12 +305,29 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) struct i915_request *req = workload->req; void *shadow_ring_buffer_va; u32 *cs; + int err; - if ((IS_KABYLAKE(req->i915) || IS_BROXTON(req->i915) - || IS_COFFEELAKE(req->i915)) - && is_inhibit_context(req->hw_context)) + if (IS_GEN(req->i915, 9) && is_inhibit_context(req->hw_context)) intel_vgpu_restore_inhibit_context(vgpu, req); + /* + * To track whether a request has started on HW, we can emit a + * breadcrumb at the beginning of the request and check its + * timeline's HWSP to see if the breadcrumb has advanced past the + * start of this request. Actually, the request must have the + * init_breadcrumb if its timeline set has_init_bread_crumb, or the + * scheduler might get a wrong state of it during reset. Since the + * requests from gvt always set the has_init_breadcrumb flag, here + * need to do the emit_init_breadcrumb for all the requests. + */ + if (req->engine->emit_init_breadcrumb) { + err = req->engine->emit_init_breadcrumb(req); + if (err) { + gvt_vgpu_err("fail to emit init breadcrumb\n"); + return err; + } + } + /* allocate shadow ring buffer */ cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); if (IS_ERR(cs)) { @@ -338,26 +362,26 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.shadow_va = NULL; } -static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, - struct i915_gem_context *ctx) +static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, + struct i915_gem_context *ctx) { struct intel_vgpu_mm *mm = workload->shadow_mm; - struct i915_hw_ppgtt *ppgtt = ctx->ppgtt; + struct i915_ppgtt *ppgtt = + i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); int i = 0; - if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed) - return -EINVAL; - if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { - px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0]; + px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; } else { for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { - px_dma(ppgtt->pdp.page_directory[i]) = - mm->ppgtt_mm.shadow_pdps[i]; + struct i915_page_directory * const pd = + i915_pd_entry(ppgtt->pd, i); + + px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } - return 0; + i915_vm_put(&ppgtt->vm); } static int @@ -365,26 +389,19 @@ intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; struct i915_request *rq; - int ret = 0; - - lockdep_assert_held(&dev_priv->drm.struct_mutex); if (workload->req) - goto out; + return 0; - rq = i915_request_alloc(engine, shadow_ctx); + rq = i915_request_create(s->shadow[workload->ring_id]); if (IS_ERR(rq)) { gvt_vgpu_err("fail to allocate gem request\n"); - ret = PTR_ERR(rq); - goto out; + return PTR_ERR(rq); } + workload->req = i915_request_get(rq); -out: - return ret; + return 0; } /** @@ -399,43 +416,22 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id]; - struct intel_context *ce; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&vgpu->vgpu_lock); if (workload->shadow) return 0; - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ce = intel_context_pin(shadow_ctx, engine); - if (IS_ERR(ce)) { - gvt_vgpu_err("fail to pin shadow context\n"); - return PTR_ERR(ce); - } - - shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT); - shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode << - GEN8_CTX_ADDRESSING_MODE_SHIFT; - if (!test_and_set_bit(workload->ring_id, s->shadow_ctx_desc_updated)) - shadow_context_descriptor_update(ce); + shadow_context_descriptor_update(s->shadow[workload->ring_id], + workload); ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) - goto err_unpin; + return ret; - if ((workload->ring_id == RCS) && - (workload->wa_ctx.indirect_ctx.size != 0)) { + if (workload->ring_id == RCS0 && workload->wa_ctx.indirect_ctx.size) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) goto err_shadow; @@ -445,8 +441,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); -err_unpin: - intel_context_unpin(ce); return ret; } @@ -485,7 +479,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb->obj->base.size); bb->clflush &= ~CLFLUSH_AFTER; } - i915_gem_obj_finish_shmem_access(bb->obj); + i915_gem_object_finish_access(bb->obj); bb->accessing = false; } else { @@ -509,18 +503,18 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) } ret = i915_gem_object_set_to_gtt_domain(bb->obj, - false); + false); if (ret) goto err; - i915_gem_obj_finish_shmem_access(bb->obj); - bb->accessing = false; - ret = i915_vma_move_to_active(bb->vma, workload->req, 0); if (ret) goto err; + + i915_gem_object_finish_access(bb->obj); + bb->accessing = false; } } return 0; @@ -574,10 +568,18 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } -static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +static void update_vreg_in_ctx(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base; + + ring_base = dev_priv->engine[workload->ring_id]->mmio_base; + vgpu_vreg_t(vgpu, RING_START(ring_base)) = workload->rb_start; +} + +static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) +{ struct intel_vgpu_shadow_bb *bb, *pos; if (list_empty(&workload->shadow_bb)) @@ -586,12 +588,10 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb = list_first_entry(&workload->shadow_bb, struct intel_vgpu_shadow_bb, list); - mutex_lock(&dev_priv->drm.struct_mutex); - list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) { if (bb->obj) { if (bb->accessing) - i915_gem_obj_finish_shmem_access(bb->obj); + i915_gem_object_finish_access(bb->obj); if (bb->va && !IS_ERR(bb->va)) i915_gem_object_unpin_map(bb->obj); @@ -600,18 +600,18 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) i915_vma_unpin(bb->vma); i915_vma_close(bb->vma); } - __i915_gem_object_release_unless_active(bb->obj); + i915_gem_object_put(bb->obj); } list_del(&bb->list); kfree(bb); } - - mutex_unlock(&dev_priv->drm.struct_mutex); } static int prepare_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + int ring = workload->ring_id; int ret = 0; ret = intel_vgpu_pin_mm(workload->shadow_mm); @@ -620,8 +620,16 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return ret; } + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } + update_shadow_pdps(workload); + set_context_ppgtt_from_shadow(workload, s->shadow[ring]->gem_context); + ret = intel_vgpu_sync_oos_pages(workload->vgpu); if (ret) { gvt_vgpu_err("fail to vgpu sync oos pages\n"); @@ -671,9 +679,6 @@ err_unpin_mm: static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - struct intel_vgpu_submission *s = &vgpu->submission; - struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -682,13 +687,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ring_id, workload); mutex_lock(&vgpu->vgpu_lock); - mutex_lock(&dev_priv->drm.struct_mutex); - - ret = set_context_ppgtt_from_shadow(workload, shadow_ctx); - if (ret < 0) { - gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); - goto err_req; - } ret = intel_gvt_workload_req_alloc(workload); if (ret) @@ -723,7 +721,6 @@ out: err_req: if (ret) workload->status = ret; - mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&vgpu->vgpu_lock); return ret; } @@ -796,14 +793,35 @@ static void update_guest_context(struct intel_vgpu_workload *workload) void *src; unsigned long context_gpa, context_page_num; int i; + struct drm_i915_private *dev_priv = gvt->dev_priv; + u32 ring_base; + u32 head, tail; + u16 wrap_count; gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, workload->ctx_desc.lrca); + head = workload->rb_head; + tail = workload->rb_tail; + wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF; + + if (tail < head) { + if (wrap_count == RB_HEAD_WRAP_CNT_MAX) + wrap_count = 0; + else + wrap_count += 1; + } + + head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail; + + ring_base = dev_priv->engine[workload->ring_id]->mmio_base; + vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail; + vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head; + context_page_num = rq->engine->context_size; context_page_num = context_page_num >> PAGE_SHIFT; - if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS) + if (IS_BROADWELL(gvt->dev_priv) && rq->engine->id == RCS0) context_page_num = 19; i = 2; @@ -817,7 +835,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) return; } - page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); + page = i915_gem_object_get_page(ctx_obj, i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, I915_GTT_PAGE_SIZE); @@ -851,16 +869,16 @@ static void update_guest_context(struct intel_vgpu_workload *workload) } void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; struct intel_engine_cs *engine; struct intel_vgpu_workload *pos, *n; - unsigned int tmp; + intel_engine_mask_t tmp; /* free the unsubmited workloads in the queues. */ - for_each_engine_masked(engine, dev_priv, engine_mask, tmp) { + for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) { list_for_each_entry_safe(pos, n, &s->workload_q_head[engine->id], list) { list_del_init(&pos->list); @@ -903,8 +921,8 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->status = 0; } - if (!workload->status && !(vgpu->resetting_eng & - ENGINE_MASK(ring_id))) { + if (!workload->status && + !(vgpu->resetting_eng & BIT(ring_id))) { update_guest_context(workload); for_each_set_bit(event, workload->pending_events, @@ -912,11 +930,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) intel_vgpu_trigger_virtual_event(vgpu, event); } - /* unpin shadow ctx as the shadow_ctx update is done */ - mutex_lock(&rq->i915->drm.struct_mutex); - intel_context_unpin(rq->hw_context); - mutex_unlock(&rq->i915->drm.struct_mutex); - i915_request_put(fetch_and_zero(&workload->req)); } @@ -927,7 +940,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) list_del_init(&workload->list); - if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { + if (workload->status || vgpu->resetting_eng & BIT(ring_id)) { /* if workload->status is not successful means HW GPU * has occurred GPU hang or something wrong with i915/GVT, * and GVT won't inject context switch interrupt to guest. @@ -941,7 +954,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) * cleaned up during the resetting process later, so doing * the workload clean up here doesn't have any impact. **/ - intel_vgpu_clean_workloads(vgpu, ENGINE_MASK(ring_id)); + intel_vgpu_clean_workloads(vgpu, BIT(ring_id)); } workload->complete(workload); @@ -972,6 +985,7 @@ static int workload_thread(void *priv) int ret; bool need_force_wake = (INTEL_GEN(gvt->dev_priv) >= 9); DEFINE_WAIT_FUNC(wait, woken_wake_function); + struct intel_runtime_pm *rpm = &gvt->dev_priv->runtime_pm; kfree(p); @@ -995,14 +1009,21 @@ static int workload_thread(void *priv) workload->ring_id, workload, workload->vgpu->id); - intel_runtime_pm_get(gvt->dev_priv); + intel_runtime_pm_get(rpm); gvt_dbg_sched("ring id %d will dispatch workload %p\n", workload->ring_id, workload); if (need_force_wake) - intel_uncore_forcewake_get(gvt->dev_priv, + intel_uncore_forcewake_get(&gvt->dev_priv->uncore, FORCEWAKE_ALL); + /* + * Update the vReg of the vGPU which submitted this + * workload. The vGPU may use these registers for checking + * the context state. The value comes from GPU commands + * in this workload. + */ + update_vreg_in_ctx(workload); ret = dispatch_workload(workload); @@ -1023,10 +1044,10 @@ complete: complete_current_workload(gvt, ring_id); if (need_force_wake) - intel_uncore_forcewake_put(gvt->dev_priv, + intel_uncore_forcewake_put(&gvt->dev_priv->uncore, FORCEWAKE_ALL); - intel_runtime_pm_put_unchecked(gvt->dev_priv); + intel_runtime_pm_put_unchecked(rpm); if (ret && (vgpu_is_vm_unhealthy(ret))) enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR); } @@ -1109,17 +1130,20 @@ err: } static void -i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s) +i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, + struct i915_ppgtt *ppgtt) { - struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt; int i; - if (i915_vm_is_48bit(&i915_ppgtt->vm)) - px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4; - else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) - px_dma(i915_ppgtt->pdp.page_directory[i]) = - s->i915_context_pdps[i]; + if (i915_vm_is_4lvl(&ppgtt->vm)) { + px_dma(ppgtt->pd) = s->i915_context_pml4; + } else { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + struct i915_page_directory * const pd = + i915_pd_entry(ppgtt->pd, i); + + px_dma(pd) = s->i915_context_pdps[i]; + } } } @@ -1133,10 +1157,15 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s) void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) { struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_engine_cs *engine; + enum intel_engine_id id; intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0); - i915_context_ppgtt_root_restore(s); - i915_gem_context_put(s->shadow_ctx); + + i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm)); + for_each_engine(engine, vgpu->gvt->dev_priv, id) + intel_context_unpin(s->shadow[id]); + kmem_cache_destroy(s->workloads); } @@ -1150,7 +1179,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu) * */ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, - unsigned long engine_mask) + intel_engine_mask_t engine_mask) { struct intel_vgpu_submission *s = &vgpu->submission; @@ -1162,17 +1191,20 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu, } static void -i915_context_ppgtt_root_save(struct intel_vgpu_submission *s) +i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, + struct i915_ppgtt *ppgtt) { - struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt; int i; - if (i915_vm_is_48bit(&i915_ppgtt->vm)) - s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4); - else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) - s->i915_context_pdps[i] = - px_dma(i915_ppgtt->pdp.page_directory[i]); + if (i915_vm_is_4lvl(&ppgtt->vm)) { + s->i915_context_pml4 = px_dma(ppgtt->pd); + } else { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + struct i915_page_directory * const pd = + i915_pd_entry(ppgtt->pd, i); + + s->i915_context_pdps[i] = px_dma(pd); + } } } @@ -1188,17 +1220,48 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s) */ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) { + struct drm_i915_private *i915 = vgpu->gvt->dev_priv; struct intel_vgpu_submission *s = &vgpu->submission; - enum intel_engine_id i; struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + struct i915_ppgtt *ppgtt; + enum intel_engine_id i; int ret; - s->shadow_ctx = i915_gem_context_create_gvt( - &vgpu->gvt->dev_priv->drm); - if (IS_ERR(s->shadow_ctx)) - return PTR_ERR(s->shadow_ctx); + ctx = i915_gem_context_create_kernel(i915, I915_PRIORITY_MAX); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + i915_gem_context_set_force_single_submission(ctx); + + ppgtt = i915_vm_to_ppgtt(i915_gem_context_get_vm_rcu(ctx)); + i915_context_ppgtt_root_save(s, ppgtt); - i915_context_ppgtt_root_save(s); + for_each_engine(engine, i915, i) { + struct intel_context *ce; + + INIT_LIST_HEAD(&s->workload_q_head[i]); + s->shadow[i] = ERR_PTR(-EINVAL); + + ce = intel_context_create(ctx, engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + goto out_shadow_ctx; + } + + if (!USES_GUC_SUBMISSION(i915)) { /* Max ring buffer size */ + const unsigned int ring_size = 512 * SZ_4K; + + ce->ring = __intel_context_ring_size(ring_size); + } + + ret = intel_context_pin(ce); + intel_context_put(ce); + if (ret) + goto out_shadow_ctx; + + s->shadow[i] = ce; + } bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); @@ -1214,16 +1277,24 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) goto out_shadow_ctx; } - for_each_engine(engine, vgpu->gvt->dev_priv, i) - INIT_LIST_HEAD(&s->workload_q_head[i]); - atomic_set(&s->running_workload_num, 0); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); + i915_vm_put(&ppgtt->vm); + i915_gem_context_put(ctx); return 0; out_shadow_ctx: - i915_gem_context_put(s->shadow_ctx); + i915_context_ppgtt_root_restore(s, ppgtt); + for_each_engine(engine, i915, i) { + if (IS_ERR(s->shadow[i])) + break; + + intel_context_unpin(s->shadow[i]); + intel_context_put(s->shadow[i]); + } + i915_vm_put(&ppgtt->vm); + i915_gem_context_put(ctx); return ret; } @@ -1240,7 +1311,7 @@ out_shadow_ctx: * */ int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu, - unsigned long engine_mask, + intel_engine_mask_t engine_mask, unsigned int interface) { struct intel_vgpu_submission *s = &vgpu->submission; @@ -1344,7 +1415,7 @@ static int prepare_mm(struct intel_vgpu_workload *workload) struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc; struct intel_vgpu_mm *mm; struct intel_vgpu *vgpu = workload->vgpu; - intel_gvt_gtt_type_t root_entry_type; + enum intel_gvt_gtt_type root_entry_type; u64 pdps[GVT_RING_CTX_NR_PDPS]; switch (desc->addressing_mode) { @@ -1372,9 +1443,6 @@ static int prepare_mm(struct intel_vgpu_workload *workload) #define same_context(a, b) (((a)->context_id == (b)->context_id) && \ ((a)->lrca == (b)->lrca)) -#define get_last_workload(q) \ - (list_empty(q) ? NULL : container_of(q->prev, \ - struct intel_vgpu_workload, list)) /** * intel_vgpu_create_workload - create a vGPU workload * @vgpu: a vGPU @@ -1394,11 +1462,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, { struct intel_vgpu_submission *s = &vgpu->submission; struct list_head *q = workload_q_head(vgpu, ring_id); - struct intel_vgpu_workload *last_workload = get_last_workload(q); + struct intel_vgpu_workload *last_workload = NULL; struct intel_vgpu_workload *workload = NULL; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; u64 ring_context_gpa; u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx; + u32 guest_head; int ret; ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, @@ -1414,18 +1483,25 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(ring_tail.val), &tail, 4); + guest_head = head; + head &= RB_HEAD_OFF_MASK; tail &= RB_TAIL_OFF_MASK; - if (last_workload && same_context(&last_workload->ctx_desc, desc)) { - gvt_dbg_el("ring id %d cur workload == last\n", ring_id); - gvt_dbg_el("ctx head %x real head %lx\n", head, - last_workload->rb_tail); - /* - * cannot use guest context head pointer here, - * as it might not be updated at this time - */ - head = last_workload->rb_tail; + list_for_each_entry_reverse(last_workload, q, list) { + + if (same_context(&last_workload->ctx_desc, desc)) { + gvt_dbg_el("ring id %d cur workload == last\n", + ring_id); + gvt_dbg_el("ctx head %x real head %lx\n", head, + last_workload->rb_tail); + /* + * cannot use guest context head pointer here, + * as it might not be updated at this time + */ + head = last_workload->rb_tail; + break; + } } gvt_dbg_el("ring id %d begin a new workload\n", ring_id); @@ -1438,6 +1514,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4); + if (!intel_gvt_ggtt_validate_range(vgpu, start, + _RING_CTL_BUF_SIZE(ctl))) { + gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start); + return ERR_PTR(-EINVAL); + } + workload = alloc_workload(vgpu); if (IS_ERR(workload)) return workload; @@ -1446,11 +1528,12 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->ctx_desc = *desc; workload->ring_context_gpa = ring_context_gpa; workload->rb_head = head; + workload->guest_rb_head = guest_head; workload->rb_tail = tail; workload->rb_start = start; workload->rb_ctl = ctl; - if (ring_id == RCS) { + if (ring_id == RCS0) { intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4); intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa + @@ -1461,9 +1544,31 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, workload->wa_ctx.indirect_ctx.size = (indirect_ctx & INDIRECT_CTX_SIZE_MASK) * CACHELINE_BYTES; + + if (workload->wa_ctx.indirect_ctx.size != 0) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.indirect_ctx.guest_gma, + workload->wa_ctx.indirect_ctx.size)) { + gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n", + workload->wa_ctx.indirect_ctx.guest_gma); + kmem_cache_free(s->workloads, workload); + return ERR_PTR(-EINVAL); + } + } + workload->wa_ctx.per_ctx.guest_gma = per_ctx & PER_CTX_ADDR_MASK; workload->wa_ctx.per_ctx.valid = per_ctx & 1; + if (workload->wa_ctx.per_ctx.valid) { + if (!intel_gvt_ggtt_validate_range(vgpu, + workload->wa_ctx.per_ctx.guest_gma, + CACHELINE_BYTES)) { + gvt_vgpu_err("invalid per_ctx at: 0x%lx\n", + workload->wa_ctx.per_ctx.guest_gma); + kmem_cache_free(s->workloads, workload); + return ERR_PTR(-EINVAL); + } + } } gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n", @@ -1479,11 +1584,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id, * as there is only one pre-allocated buf-obj for shadow. */ if (list_empty(workload_q_head(vgpu, ring_id))) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); + intel_runtime_pm_get(&dev_priv->runtime_pm); ret = intel_gvt_scan_and_shadow_workload(workload); - mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put_unchecked(dev_priv); + intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm); } if (ret) { |

