diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gvt')
-rw-r--r-- | drivers/gpu/drm/i915/gvt/aperture_gm.c | 26 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/cfg_space.c | 51 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/cmd_parser.c | 39 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/display.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/execlist.c | 133 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gtt.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gvt.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/gvt.h | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/handlers.c | 92 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/kvmgt.c | 44 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/mmio.c | 47 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/mmio.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/render.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.c | 164 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/gvt/scheduler.h | 4 |
15 files changed, 445 insertions, 189 deletions
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index ca3d1925beda..7c9ec4f4f36c 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -173,8 +173,8 @@ static void free_vgpu_fence(struct intel_vgpu *vgpu) _clear_vgpu_fence(vgpu); for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; - list_add_tail(®->link, - &dev_priv->mm.fence_list); + i915_unreserve_fence(reg); + vgpu->fence.regs[i] = NULL; } mutex_unlock(&dev_priv->drm.struct_mutex); @@ -187,24 +187,19 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) struct drm_i915_private *dev_priv = gvt->dev_priv; struct drm_i915_fence_reg *reg; int i; - struct list_head *pos, *q; intel_runtime_pm_get(dev_priv); /* Request fences from host */ mutex_lock(&dev_priv->drm.struct_mutex); - i = 0; - list_for_each_safe(pos, q, &dev_priv->mm.fence_list) { - reg = list_entry(pos, struct drm_i915_fence_reg, link); - if (reg->pin_count || reg->vma) - continue; - list_del(pos); + + for (i = 0; i < vgpu_fence_sz(vgpu); i++) { + reg = i915_reserve_fence(dev_priv); + if (IS_ERR(reg)) + goto out_free_fence; + vgpu->fence.regs[i] = reg; - if (++i == vgpu_fence_sz(vgpu)) - break; } - if (i != vgpu_fence_sz(vgpu)) - goto out_free_fence; _clear_vgpu_fence(vgpu); @@ -212,13 +207,14 @@ static int alloc_vgpu_fence(struct intel_vgpu *vgpu) intel_runtime_pm_put(dev_priv); return 0; out_free_fence: + gvt_vgpu_err("Failed to alloc fences\n"); /* Return fences to host, if fail */ for (i = 0; i < vgpu_fence_sz(vgpu); i++) { reg = vgpu->fence.regs[i]; if (!reg) continue; - list_add_tail(®->link, - &dev_priv->mm.fence_list); + i915_unreserve_fence(reg); + vgpu->fence.regs[i] = NULL; } mutex_unlock(&dev_priv->drm.struct_mutex); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index ff3154fe6588..4ce2e6bd0680 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -101,7 +101,7 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(bytes > 4)) return -EINVAL; - if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) + if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size)) return -EINVAL; memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes); @@ -110,13 +110,25 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, static int map_aperture(struct intel_vgpu *vgpu, bool map) { - u64 first_gfn, first_mfn; + phys_addr_t aperture_pa = vgpu_aperture_pa_base(vgpu); + unsigned long aperture_sz = vgpu_aperture_sz(vgpu); + u64 first_gfn; u64 val; int ret; if (map == vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked) return 0; + if (map) { + vgpu->gm.aperture_va = memremap(aperture_pa, aperture_sz, + MEMREMAP_WC); + if (!vgpu->gm.aperture_va) + return -ENOMEM; + } else { + memunmap(vgpu->gm.aperture_va); + vgpu->gm.aperture_va = NULL; + } + val = vgpu_cfg_space(vgpu)[PCI_BASE_ADDRESS_2]; if (val & PCI_BASE_ADDRESS_MEM_TYPE_64) val = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2); @@ -124,14 +136,16 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map) val = *(u32 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_2); first_gfn = (val + vgpu_aperture_offset(vgpu)) >> PAGE_SHIFT; - first_mfn = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT; ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn, - first_mfn, - vgpu_aperture_sz(vgpu) >> - PAGE_SHIFT, map); - if (ret) + aperture_pa >> PAGE_SHIFT, + aperture_sz >> PAGE_SHIFT, + map); + if (ret) { + memunmap(vgpu->gm.aperture_va); + vgpu->gm.aperture_va = NULL; return ret; + } vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked = map; return 0; @@ -194,6 +208,20 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, return 0; } +static int emulate_pci_rom_bar_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 *pval = (u32 *)(vgpu_cfg_space(vgpu) + offset); + u32 new = *(u32 *)(p_data); + + if ((new & PCI_ROM_ADDRESS_MASK) == PCI_ROM_ADDRESS_MASK) + /* We don't have rom, return size of 0. */ + *pval = 0; + else + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); + return 0; +} + static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -275,7 +303,7 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, if (WARN_ON(bytes > 4)) return -EINVAL; - if (WARN_ON(offset + bytes > INTEL_GVT_MAX_CFG_SPACE_SZ)) + if (WARN_ON(offset + bytes > vgpu->gvt->device_info.cfg_space_size)) return -EINVAL; /* First check if it's PCI_COMMAND */ @@ -286,6 +314,11 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, } switch (rounddown(offset, 4)) { + case PCI_ROM_ADDRESS: + if (WARN_ON(!IS_ALIGNED(offset, 4))) + return -EINVAL; + return emulate_pci_rom_bar_write(vgpu, offset, p_data, bytes); + case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5: if (WARN_ON(!IS_ALIGNED(offset, 4))) return -EINVAL; @@ -361,6 +394,8 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu, pci_resource_len(gvt->dev_priv->drm.pdev, 0); vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size = pci_resource_len(gvt->dev_priv->drm.pdev, 2); + + memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4); } /** diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d4726a3358a4..85d4c57870fb 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1576,11 +1576,11 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) return 1; } -static uint32_t find_bb_size(struct parser_exec_state *s) +static int find_bb_size(struct parser_exec_state *s) { unsigned long gma = 0; struct cmd_info *info; - uint32_t bb_size = 0; + int bb_size = 0; uint32_t cmd_len = 0; bool met_bb_end = false; struct intel_vgpu *vgpu = s->vgpu; @@ -1628,7 +1628,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) struct intel_shadow_bb_entry *entry_obj; struct intel_vgpu *vgpu = s->vgpu; unsigned long gma = 0; - uint32_t bb_size; + int bb_size; void *dst = NULL; int ret = 0; @@ -1637,6 +1637,8 @@ static int perform_bb_shadow(struct parser_exec_state *s) /* get the size of the batch buffer */ bb_size = find_bb_size(s); + if (bb_size < 0) + return -EINVAL; /* allocate shadow batch buffer */ entry_obj = kmalloc(sizeof(*entry_obj), GFP_KERNEL); @@ -2603,7 +2605,8 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - u32 *cs; + void *shadow_ring_buffer_va; + int ring_id = workload->ring_id; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2616,34 +2619,42 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_tail = workload->rb_start + workload->rb_tail; gma_top = workload->rb_start + guest_rb_size; - /* allocate shadow ring buffer */ - cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); - if (IS_ERR(cs)) - return PTR_ERR(cs); + if (workload->rb_len > vgpu->reserve_ring_buffer_size[ring_id]) { + void *va = vgpu->reserve_ring_buffer_va[ring_id]; + /* realloc the new ring buffer if needed */ + vgpu->reserve_ring_buffer_va[ring_id] = + krealloc(va, workload->rb_len, GFP_KERNEL); + if (!vgpu->reserve_ring_buffer_va[ring_id]) { + gvt_vgpu_err("fail to alloc reserve ring buffer\n"); + return -ENOMEM; + } + vgpu->reserve_ring_buffer_size[ring_id] = workload->rb_len; + } + + shadow_ring_buffer_va = vgpu->reserve_ring_buffer_va[ring_id]; /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = cs; + workload->shadow_ring_buffer_va = shadow_ring_buffer_va; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, cs); + gma_head, gma_top, shadow_ring_buffer_va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - cs += ret / sizeof(u32); + shadow_ring_buffer_va += ret; gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, + shadow_ring_buffer_va); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); return ret; } - cs += ret / sizeof(u32); - intel_ring_advance(workload->req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 3c318439a659..309f3fa6794a 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -266,6 +266,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) /* Clear host CRT status, so guest couldn't detect this host CRT. */ if (IS_BROADWELL(dev_priv)) vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + + vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) @@ -307,6 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, port->type = type; emulate_monitor_status_change(vgpu); + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index e5320b4eb698..940cdaaa3f24 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -368,7 +368,7 @@ static void free_workload(struct intel_vgpu_workload *workload) #define get_desc_from_elsp_dwords(ed, i) \ ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2])) -static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) +static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) { const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd; struct intel_shadow_bb_entry *entry_obj; @@ -379,7 +379,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0); if (IS_ERR(vma)) { - return; + return PTR_ERR(vma); } /* FIXME: we are not tracking our pinned VMA leaving it @@ -392,6 +392,7 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) if (gmadr_bytes == 8) entry_obj->bb_start_cmd_va[2] = 0; } + return 0; } static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) @@ -420,7 +421,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) return 0; } -static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) { struct i915_vma *vma; unsigned char *per_ctx_va = @@ -428,12 +429,12 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->indirect_ctx.size; if (wa_ctx->indirect_ctx.size == 0) - return; + return 0; vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL, 0, CACHELINE_BYTES, 0); if (IS_ERR(vma)) { - return; + return PTR_ERR(vma); } /* FIXME: we are not tracking our pinned VMA leaving it @@ -447,26 +448,7 @@ static void prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) memset(per_ctx_va, 0, CACHELINE_BYTES); update_wa_ctx_2_shadow_ctx(wa_ctx); -} - -static int prepare_execlist_workload(struct intel_vgpu_workload *workload) -{ - struct intel_vgpu *vgpu = workload->vgpu; - struct execlist_ctx_descriptor_format ctx[2]; - int ring_id = workload->ring_id; - - intel_vgpu_pin_mm(workload->shadow_mm); - intel_vgpu_sync_oos_pages(workload->vgpu); - intel_vgpu_flush_post_shadow(workload->vgpu); - prepare_shadow_batch_buffer(workload); - prepare_shadow_wa_ctx(&workload->wa_ctx); - if (!workload->emulate_schedule_in) - return 0; - - ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); - ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); - - return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx); + return 0; } static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) @@ -489,13 +471,68 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) } } -static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +static int prepare_execlist_workload(struct intel_vgpu_workload *workload) { - if (!wa_ctx->indirect_ctx.obj) - return; + struct intel_vgpu *vgpu = workload->vgpu; + struct execlist_ctx_descriptor_format ctx[2]; + int ring_id = workload->ring_id; + int ret; + + ret = intel_vgpu_pin_mm(workload->shadow_mm); + if (ret) { + gvt_vgpu_err("fail to vgpu pin mm\n"); + goto out; + } + + ret = intel_vgpu_sync_oos_pages(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to vgpu sync oos pages\n"); + goto err_unpin_mm; + } + + ret = intel_vgpu_flush_post_shadow(workload->vgpu); + if (ret) { + gvt_vgpu_err("fail to flush post shadow\n"); + goto err_unpin_mm; + } + + ret = intel_gvt_generate_request(workload); + if (ret) { + gvt_vgpu_err("fail to generate request\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_batch_buffer(workload); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n"); + goto err_unpin_mm; + } + + ret = prepare_shadow_wa_ctx(&workload->wa_ctx); + if (ret) { + gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n"); + goto err_shadow_batch; + } + + if (!workload->emulate_schedule_in) + return 0; - i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); - i915_gem_object_put(wa_ctx->indirect_ctx.obj); + ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1); + ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0); + + ret = emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx); + if (!ret) + goto out; + else + gvt_vgpu_err("fail to emulate execlist schedule in\n"); + + release_shadow_wa_ctx(&workload->wa_ctx); +err_shadow_batch: + release_shadow_batch_buffer(workload); +err_unpin_mm: + intel_vgpu_unpin_mm(workload->shadow_mm); +out: + return ret; } static int complete_execlist_workload(struct intel_vgpu_workload *workload) @@ -511,8 +548,10 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) gvt_dbg_el("complete workload %p status %d\n", workload, workload->status); - release_shadow_batch_buffer(workload); - release_shadow_wa_ctx(&workload->wa_ctx); + if (!workload->status) { + release_shadow_batch_buffer(workload); + release_shadow_wa_ctx(&workload->wa_ctx); + } if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) { /* if workload->status is not successful means HW GPU @@ -819,10 +858,21 @@ static void clean_workloads(struct intel_vgpu *vgpu, unsigned long engine_mask) void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu) { + enum intel_engine_id i; + struct intel_engine_cs *engine; + clean_workloads(vgpu, ALL_ENGINES); kmem_cache_destroy(vgpu->workloads); + + for_each_engine(engine, vgpu->gvt->dev_priv, i) { + kfree(vgpu->reserve_ring_buffer_va[i]); + vgpu->reserve_ring_buffer_va[i] = NULL; + vgpu->reserve_ring_buffer_size[i] = 0; + } + } +#define RESERVE_RING_BUFFER_SIZE ((1 * PAGE_SIZE)/8) int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) { enum intel_engine_id i; @@ -842,7 +892,26 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu) if (!vgpu->workloads) return -ENOMEM; + /* each ring has a shadow ring buffer until vgpu destroyed */ + for_each_engine(engine, vgpu->gvt->dev_priv, i) { + vgpu->reserve_ring_buffer_va[i] = + kmalloc(RESERVE_RING_BUFFER_SIZE, GFP_KERNEL); + if (!vgpu->reserve_ring_buffer_va[i]) { + gvt_vgpu_err("fail to alloc reserve ring buffer\n"); + goto out; + } + vgpu->reserve_ring_buffer_size[i] = RESERVE_RING_BUFFER_SIZE; + } return 0; +out: + for_each_engine(engine, vgpu->gvt->dev_priv, i) { + if (vgpu->reserve_ring_buffer_size[i]) { + kfree(vgpu->reserve_ring_buffer_va[i]); + vgpu->reserve_ring_buffer_va[i] = NULL; + vgpu->reserve_ring_buffer_size[i] = 0; + } + } + return -ENOMEM; } void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu, diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index e6dfc3331f4b..8e331142badb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -311,9 +311,9 @@ static inline int gtt_set_entry64(void *pt, #define GTT_HAW 46 -#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30 + 1)) - 1) << 30) -#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21 + 1)) - 1) << 21) -#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12 + 1)) - 1) << 12) +#define ADDR_1G_MASK (((1UL << (GTT_HAW - 30)) - 1) << 30) +#define ADDR_2M_MASK (((1UL << (GTT_HAW - 21)) - 1) << 21) +#define ADDR_4K_MASK (((1UL << (GTT_HAW - 12)) - 1) << 12) static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e) { @@ -1647,14 +1647,13 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) if (WARN_ON(mm->type != INTEL_GVT_MM_PPGTT)) return 0; - atomic_inc(&mm->pincount); - if (!mm->shadowed) { ret = shadow_mm(mm); if (ret) return ret; } + atomic_inc(&mm->pincount); list_del_init(&mm->lru_list); list_add_tail(&mm->lru_list, &mm->vgpu->gvt->gtt.mm_lru_list_head); return 0; @@ -1972,7 +1971,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu, */ se.val64 |= _PAGE_PRESENT | _PAGE_RW; if (type == GTT_TYPE_PPGTT_PDE_PT) - se.val64 |= PPAT_CACHED_INDEX; + se.val64 |= PPAT_CACHED; for (i = 0; i < page_entry_num; i++) ops->set_entry(scratch_pt, &se, i, false, 0, vgpu); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index c27c6838eaca..aaa347f8620c 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -111,7 +111,7 @@ static void init_device_info(struct intel_gvt *gvt) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { info->max_support_vgpus = 8; - info->cfg_space_size = 256; + info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; info->mmio_size = 2 * 1024 * 1024; info->mmio_bar = 0; info->gtt_start_offset = 8 * 1024 * 1024; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 44b719eda8c4..9c2e7c0aa38f 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -80,6 +80,7 @@ struct intel_gvt_device_info { struct intel_vgpu_gm { u64 aperture_sz; u64 hidden_sz; + void *aperture_va; struct drm_mm_node low_gm_node; struct drm_mm_node high_gm_node; }; @@ -99,7 +100,6 @@ struct intel_vgpu_mmio { bool disable_warn_untrack; }; -#define INTEL_GVT_MAX_CFG_SPACE_SZ 256 #define INTEL_GVT_MAX_BAR_NUM 4 struct intel_vgpu_pci_bar { @@ -108,7 +108,7 @@ struct intel_vgpu_pci_bar { }; struct intel_vgpu_cfg_space { - unsigned char virtual_cfg_space[INTEL_GVT_MAX_CFG_SPACE_SZ]; + unsigned char virtual_cfg_space[PCI_CFG_SPACE_EXP_SIZE]; struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM]; }; @@ -165,6 +165,9 @@ struct intel_vgpu { struct list_head workload_q_head[I915_NUM_ENGINES]; struct kmem_cache *workloads; atomic_t running_workload_num; + /* 1/2K for each reserve ring buffer */ + void *reserve_ring_buffer_va[I915_NUM_ENGINES]; + int reserve_ring_buffer_size[I915_NUM_ENGINES]; DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES); @@ -474,6 +477,13 @@ int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); +static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar) +{ + /* We are 64bit bar. */ + return (*(u64 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & + PCI_BASE_ADDRESS_MEM_MASK; +} + void intel_gvt_clean_opregion(struct intel_gvt *gvt); int intel_gvt_init_opregion(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a5bed2e71b92..1f840f6b81bb 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -137,17 +137,26 @@ static int new_mmio_info(struct intel_gvt *gvt, return 0; } -static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) +/** + * intel_gvt_render_mmio_to_ring_id - convert a mmio offset into ring id + * @gvt: a GVT device + * @offset: register offset + * + * Returns: + * Ring ID on success, negative error code if failed. + */ +int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt, + unsigned int offset) { enum intel_engine_id id; struct intel_engine_cs *engine; - reg &= ~GENMASK(11, 0); + offset &= ~GENMASK(11, 0); for_each_engine(engine, gvt->dev_priv, id) { - if (engine->mmio_base == reg) + if (engine->mmio_base == offset) return id; } - return -1; + return -ENODEV; } #define offset_to_fence_num(offset) \ @@ -1381,40 +1390,6 @@ static int skl_power_well_ctl_write(struct intel_vgpu *vgpu, return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes); } -static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, - void *p_data, unsigned int bytes) -{ - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; - u32 v = *(u32 *)p_data; - - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) - return intel_vgpu_default_mmio_write(vgpu, - offset, p_data, bytes); - - switch (offset) { - case 0x4ddc: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 31); - break; - case 0x42080: - /* bypass WaCompressedResourceDisplayNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 15); - break; - case 0xe194: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 8); - break; - case 0x7014: - /* bypass WaCompressedResourceSamplerPbeMediaNewHashMode */ - vgpu_vreg(vgpu, offset) = v & ~(1 << 13); - break; - default: - return -EINVAL; - } - - return 0; -} - static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -1432,18 +1407,36 @@ static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset, static int mmio_read_from_hw(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_gvt *gvt = vgpu->gvt; + struct drm_i915_private *dev_priv = gvt->dev_priv; + int ring_id; + u32 ring_base; + + ring_id = intel_gvt_render_mmio_to_ring_id(gvt, offset); + /** + * Read HW reg in following case + * a. the offset isn't a ring mmio + * b. the offset's ring is running on hw. + * c. the offset is ring time stamp mmio + */ + if (ring_id >= 0) + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (ring_id < 0 || vgpu == gvt->scheduler.engine_owner[ring_id] || + offset == i915_mmio_reg_offset(RING_TIMESTAMP(ring_base)) || + offset == i915_mmio_reg_offset(RING_TIMESTAMP_UDW(ring_base))) { + mmio_hw_access_pre(dev_priv); + vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); + mmio_hw_access_post(dev_priv); + } - mmio_hw_access_pre(dev_priv); - vgpu_vreg(vgpu, offset) = I915_READ(_MMIO(offset)); - mmio_hw_access_post(dev_priv); return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes); } static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); struct intel_vgpu_execlist *execlist; u32 data = *(u32 *)p_data; int ret = 0; @@ -1470,7 +1463,7 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 data = *(u32 *)p_data; - int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); bool enable_execlist; write_vreg(vgpu, offset, p_data, bytes); @@ -1671,8 +1664,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2564,8 +2557,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x6e570, D_BDW_PLUS); MMIO_D(0x65f10, D_BDW_PLUS); - MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, - skl_misc_ctl_write); + MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2615,8 +2607,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write); - MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write); + MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL); + MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL); MMIO_D(0x45504, D_SKL_PLUS); MMIO_D(0x45520, D_SKL_PLUS); MMIO_D(0x46000, D_SKL_PLUS); diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 83e88c70272a..96060920a6fe 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -609,21 +609,20 @@ static void intel_vgpu_release_work(struct work_struct *work) __intel_vgpu_release(vgpu); } -static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu) +static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) { u32 start_lo, start_hi; u32 mem_type; - int pos = PCI_BASE_ADDRESS_0; - start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) & + start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & PCI_BASE_ADDRESS_MEM_MASK; - mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) & + mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) & PCI_BASE_ADDRESS_MEM_TYPE_MASK; switch (mem_type) { case PCI_BASE_ADDRESS_MEM_TYPE_64: start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space - + pos + 4)); + + bar + 4)); break; case PCI_BASE_ADDRESS_MEM_TYPE_32: case PCI_BASE_ADDRESS_MEM_TYPE_1M: @@ -637,6 +636,21 @@ static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu) return ((u64)start_hi << 32) | start_lo; } +static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, uint64_t off, + void *buf, unsigned int count, bool is_write) +{ + uint64_t bar_start = intel_vgpu_get_bar_addr(vgpu, bar); + int ret; + + if (is_write) + ret = intel_gvt_ops->emulate_mmio_write(vgpu, + bar_start + off, buf, count); + else + ret = intel_gvt_ops->emulate_mmio_read(vgpu, + bar_start + off, buf, count); + return ret; +} + static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, size_t count, loff_t *ppos, bool is_write) { @@ -661,20 +675,14 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, buf, count); break; case VFIO_PCI_BAR0_REGION_INDEX: - case VFIO_PCI_BAR1_REGION_INDEX: - if (is_write) { - uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu); - - ret = intel_gvt_ops->emulate_mmio_write(vgpu, - bar0_start + pos, buf, count); - } else { - uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu); - - ret = intel_gvt_ops->emulate_mmio_read(vgpu, - bar0_start + pos, buf, count); - } + ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos, + buf, count, is_write); break; case VFIO_PCI_BAR2_REGION_INDEX: + ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_2, pos, + buf, count, is_write); + break; + case VFIO_PCI_BAR1_REGION_INDEX: case VFIO_PCI_BAR3_REGION_INDEX: case VFIO_PCI_BAR4_REGION_INDEX: case VFIO_PCI_BAR5_REGION_INDEX: @@ -970,7 +978,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, switch (info.index) { case VFIO_PCI_CONFIG_REGION_INDEX: info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = INTEL_GVT_MAX_CFG_SPACE_SZ; + info.size = vgpu->gvt->device_info.cfg_space_size; info.flags = VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE; break; diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 980ec8906b1e..1e1310f50289 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -45,8 +45,7 @@ */ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) { - u64 gttmmio_gpa = *(u64 *)(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_0) & - ~GENMASK(3, 0); + u64 gttmmio_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0); return gpa - gttmmio_gpa; } @@ -57,6 +56,38 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) (reg >= gvt->device_info.gtt_start_offset \ && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) +static bool vgpu_gpa_is_aperture(struct intel_vgpu *vgpu, uint64_t gpa) +{ + u64 aperture_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_2); + u64 aperture_sz = vgpu_aperture_sz(vgpu); + + return gpa >= aperture_gpa && gpa < aperture_gpa + aperture_sz; +} + +static int vgpu_aperture_rw(struct intel_vgpu *vgpu, uint64_t gpa, + void *pdata, unsigned int size, bool is_read) +{ + u64 aperture_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_2); + u64 offset = gpa - aperture_gpa; + + if (!vgpu_gpa_is_aperture(vgpu, gpa + size - 1)) { + gvt_vgpu_err("Aperture rw out of range, offset %llx, size %d\n", + offset, size); + return -EINVAL; + } + + if (!vgpu->gm.aperture_va) { + gvt_vgpu_err("BAR is not enabled\n"); + return -ENXIO; + } + + if (is_read) + memcpy(pdata, vgpu->gm.aperture_va + offset, size); + else + memcpy(vgpu->gm.aperture_va + offset, pdata, size); + return 0; +} + static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes, bool read) { @@ -133,6 +164,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, } mutex_lock(&gvt->lock); + if (vgpu_gpa_is_aperture(vgpu, pa)) { + ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, true); + mutex_unlock(&gvt->lock); + return ret; + } + if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { struct intel_vgpu_guest_page *gp; @@ -224,6 +261,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, mutex_lock(&gvt->lock); + if (vgpu_gpa_is_aperture(vgpu, pa)) { + ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, false); + mutex_unlock(&gvt->lock); + return ret; + } + if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { struct intel_vgpu_guest_page *gp; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 32cd64ddad26..dbc04ad2c7a1 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -65,6 +65,8 @@ struct intel_gvt_mmio_info { struct hlist_node node; }; +int intel_gvt_render_mmio_to_ring_id(struct intel_gvt *gvt, + unsigned int reg); unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt); bool intel_gvt_match_device(struct intel_gvt *gvt, unsigned long device); diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 2ea542257f03..6d066cf35478 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -293,7 +293,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id) */ if (mmio->in_context && ((ctx_ctrl & inhibit_mask) != inhibit_mask) && - i915.enable_execlists) + i915_modparams.enable_execlists) continue; if (mmio->mask) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 391800d2067b..69f8f0d155b9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -87,7 +87,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) return -EINVAL; } - page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); + page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, GTT_PAGE_SIZE); @@ -131,6 +131,20 @@ static inline bool is_gvt_request(struct drm_i915_gem_request *req) return i915_gem_context_force_single_submission(req->ctx); } +static void save_ring_hw_state(struct intel_vgpu *vgpu, int ring_id) +{ + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + u32 ring_base = dev_priv->engine[ring_id]->mmio_base; + i915_reg_t reg; + + reg = RING_INSTDONE(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); + reg = RING_ACTHD_UDW(ring_base); + vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) = I915_READ_FW(reg); +} + static int shadow_context_status_change(struct notifier_block *nb, unsigned long action, void *data) { @@ -140,9 +154,10 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; enum intel_engine_id ring_id = req->engine->id; struct intel_vgpu_workload *workload; + unsigned long flags; if (!is_gvt_request(req)) { - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (action == INTEL_CONTEXT_SCHEDULE_IN && scheduler->engine_owner[ring_id]) { /* Switch ring from vGPU to host. */ @@ -150,7 +165,7 @@ static int shadow_context_status_change(struct notifier_block *nb, NULL, ring_id); scheduler->engine_owner[ring_id] = NULL; } - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); return NOTIFY_OK; } @@ -161,7 +176,7 @@ static int shadow_context_status_change(struct notifier_block *nb, switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: - spin_lock_bh(&scheduler->mmio_context_lock); + spin_lock_irqsave(&scheduler->mmio_context_lock, flags); if (workload->vgpu != scheduler->engine_owner[ring_id]) { /* Switch ring from host to vGPU or vGPU to vGPU. */ intel_gvt_switch_mmio(scheduler->engine_owner[ring_id], @@ -170,12 +185,16 @@ static int shadow_context_status_change(struct notifier_block *nb, } else gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n", ring_id, workload->vgpu->id); - spin_unlock_bh(&scheduler->mmio_context_lock); + spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags); atomic_set(&workload->shadow_ctx_active, 1); break; case INTEL_CONTEXT_SCHEDULE_OUT: + save_ring_hw_state(workload->vgpu, ring_id); atomic_set(&workload->shadow_ctx_active, 0); break; + case INTEL_CONTEXT_SCHEDULE_PREEMPTED: + save_ring_hw_state(workload->vgpu, ring_id); + break; default: WARN_ON(1); return NOTIFY_OK; @@ -201,6 +220,43 @@ static void shadow_context_descriptor_update(struct i915_gem_context *ctx, ce->lrc_desc = desc; } +static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + void *shadow_ring_buffer_va; + u32 *cs; + + /* allocate shadow ring buffer */ + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) { + gvt_vgpu_err("fail to alloc size =%ld shadow ring buffer\n", + workload->rb_len); + return PTR_ERR(cs); + } + + shadow_ring_buffer_va = workload->shadow_ring_buffer_va; + + /* get shadow ring buffer va */ + workload->shadow_ring_buffer_va = cs; + + memcpy(cs, shadow_ring_buffer_va, + workload->rb_len); + + cs += workload->rb_len / sizeof(u32); + intel_ring_advance(workload->req, cs); + + return 0; +} + +void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) +{ + if (!wa_ctx->indirect_ctx.obj) + return; + + i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj); + i915_gem_object_put(wa_ctx->indirect_ctx.obj); +} + /** * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and * shadow it as well, include ringbuffer,wa_ctx and ctx. @@ -214,8 +270,9 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; - struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; struct intel_vgpu *vgpu = workload->vgpu; + struct intel_ring *ring; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -231,35 +288,73 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) shadow_context_descriptor_update(shadow_ctx, dev_priv->engine[ring_id]); - rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); - if (IS_ERR(rq)) { - gvt_vgpu_err("fail to allocate gem request\n"); - ret = PTR_ERR(rq); - goto out; - } - - gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); - - workload->req = i915_gem_request_get(rq); - ret = intel_gvt_scan_and_shadow_ringbuffer(workload); if (ret) - goto out; + goto err_scan; if ((workload->ring_id == RCS) && (workload->wa_ctx.indirect_ctx.size != 0)) { ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); if (ret) - goto out; + goto err_scan; + } + + /* pin shadow context by gvt even the shadow context will be pinned + * when i915 alloc request. That is because gvt will update the guest + * context from shadow context when workload is completed, and at that + * moment, i915 may already unpined the shadow context to make the + * shadow_ctx pages invalid. So gvt need to pin itself. After update + * the guest context, gvt can unpin the shadow_ctx safely. + */ + ring = engine->context_pin(engine, shadow_ctx); + if (IS_ERR(ring)) { + ret = PTR_ERR(ring); + gvt_vgpu_err("fail to pin shadow context\n"); + goto err_shadow; } ret = populate_shadow_context(workload); if (ret) - goto out; - + goto err_unpin; workload->shadowed = true; + return 0; -out: +err_unpin: + engine->context_unpin(engine, shadow_ctx); +err_shadow: + release_shadow_wa_ctx(&workload->wa_ctx); +err_scan: + return ret; +} + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload) +{ + int ring_id = workload->ring_id; + struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[ring_id]; + struct drm_i915_gem_request *rq; + struct intel_vgpu *vgpu = workload->vgpu; + struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; + int ret; + + rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); + if (IS_ERR(rq)) { + gvt_vgpu_err("fail to allocate gem request\n"); + ret = PTR_ERR(rq); + goto err_unpin; + } + + gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq); + + workload->req = i915_gem_request_get(rq); + ret = copy_workload_to_ring_buffer(workload); + if (ret) + goto err_unpin; + return 0; + +err_unpin: + engine->context_unpin(engine, shadow_ctx); + release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -269,8 +364,6 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct intel_engine_cs *engine = dev_priv->engine[ring_id]; - struct intel_vgpu *vgpu = workload->vgpu; - struct intel_ring *ring; int ret = 0; gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n", @@ -284,22 +377,10 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) if (workload->prepare) { ret = workload->prepare(workload); - if (ret) + if (ret) { + engine->context_unpin(engine, shadow_ctx); goto out; - } - - /* pin shadow context by gvt even the shadow context will be pinned - * when i915 alloc request. That is because gvt will update the guest - * context from shadow context when workload is completed, and at that - * moment, i915 may already unpined the shadow context to make the - * shadow_ctx pages invalid. So gvt need to pin itself. After update - * the guest context, gvt can unpin the shadow_ctx safely. - */ - ring = engine->context_pin(engine, shadow_ctx); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - gvt_vgpu_err("fail to pin shadow context\n"); - goto out; + } } out: @@ -408,7 +489,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) return; } - page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); + page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i); src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, GTT_PAGE_SIZE); @@ -676,6 +757,9 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu) if (IS_ERR(vgpu->shadow_ctx)) return PTR_ERR(vgpu->shadow_ctx); + if (INTEL_INFO(vgpu->gvt->dev_priv)->has_logical_ring_preemption) + vgpu->shadow_ctx->priority = INT_MAX; + vgpu->shadow_ctx->engine[RCS].initialised = true; bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 93a49eb0209e..b9f872204d7e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -141,4 +141,8 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu); void intel_vgpu_clean_gvt_context(struct intel_vgpu *vgpu); +void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx); + +int intel_gvt_generate_request(struct intel_vgpu_workload *workload); + #endif |