diff options
Diffstat (limited to 'drivers')
76 files changed, 3807 insertions, 2141 deletions
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 7eec18925b70..6798c3ad9d53 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -3611,32 +3611,6 @@ int drm_av_sync_delay(struct drm_connector *connector, EXPORT_SYMBOL(drm_av_sync_delay); /** - * drm_select_eld - select one ELD from multiple HDMI/DP sinks - * @encoder: the encoder just changed display mode - * - * It's possible for one encoder to be associated with multiple HDMI/DP sinks. - * The policy is now hard coded to simply use the first HDMI/DP sink's ELD. - * - * Return: The connector associated with the first HDMI/DP sink that has ELD - * attached to it. - */ -struct drm_connector *drm_select_eld(struct drm_encoder *encoder) -{ - struct drm_connector *connector; - struct drm_device *dev = encoder->dev; - - WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); - WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); - - drm_for_each_connector(connector, dev) - if (connector->encoder == encoder && connector->eld[0]) - return connector; - - return NULL; -} -EXPORT_SYMBOL(drm_select_eld); - -/** * drm_detect_hdmi_monitor - detect whether monitor is HDMI * @edid: monitor EDID information * diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index df96aed6975a..5ddde7349fbd 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -36,15 +36,20 @@ config DRM_I915 If "M" is selected, the module will be called i915. -config DRM_I915_PRELIMINARY_HW_SUPPORT - bool "Enable preliminary support for prerelease Intel hardware by default" +config DRM_I915_ALPHA_SUPPORT + bool "Enable alpha quality support for new Intel hardware by default" depends on DRM_I915 default n help - Choose this option if you have prerelease Intel hardware and want the - i915 driver to support it by default. You can enable such support at - runtime with the module option i915.preliminary_hw_support=1; this - option changes the default for that module option. + Choose this option if you have new Intel hardware and want to enable + the alpha quality i915 driver support for the hardware in this kernel + version. You can also enable the support at runtime using the module + parameter i915.alpha_support=1; this option changes the default for + that module parameter. + + It is recommended to upgrade to a kernel version with proper support + as soon as it is available. Generally fixes for platforms with alpha + support are not backported to older kernels. If in doubt, say "N". @@ -107,6 +112,15 @@ config DRM_I915_GVT If in doubt, say "N". +config DRM_I915_GVT_KVMGT + tristate "Enable KVM/VFIO support for Intel GVT-g" + depends on DRM_I915_GVT + depends on KVM + default n + help + Choose this option if you want to enable KVMGT support for + Intel GVT-g. + menu "drm/i915 Debugging" depends on DRM_I915 depends on EXPERT diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0857e5035f4d..3dea46af9fe6 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,7 +33,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ i915_gem_execbuffer.o \ - i915_gem_fence.o \ + i915_gem_fence_reg.o \ i915_gem_gtt.o \ i915_gem_internal.o \ i915_gem.o \ @@ -45,6 +45,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_timeline.o \ i915_gem_userptr.o \ i915_trace_points.o \ + i915_vma.o \ intel_breadcrumbs.o \ intel_engine_cs.o \ intel_hangcheck.o \ diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile index 34ea4776af70..8a46a7f31d53 100644 --- a/drivers/gpu/drm/i915/gvt/Makefile +++ b/drivers/gpu/drm/i915/gvt/Makefile @@ -3,5 +3,8 @@ GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \ interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \ execlist.o scheduler.o sched_policy.o render.o cmd_parser.o -ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall -i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) +ccflags-y += -I$(src) -I$(src)/$(GVT_DIR) -Wall +i915-y += $(addprefix $(GVT_DIR)/, $(GVT_SOURCE)) + +CFLAGS_kvmgt.o := -Wno-unused-function +obj-$(CONFIG_DRM_I915_GVT_KVMGT) += $(GVT_DIR)/kvmgt.o diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 4c687740f5f1..db516382a4d4 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -47,11 +47,9 @@ enum { * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; - if (WARN_ON(bytes > 4)) return -EINVAL; @@ -82,9 +80,8 @@ static int map_aperture(struct intel_vgpu *vgpu, bool map) ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, first_gfn, first_mfn, - vgpu_aperture_sz(vgpu) - >> PAGE_SHIFT, map, - GVT_MAP_APERTURE); + vgpu_aperture_sz(vgpu) >> + PAGE_SHIFT, map); if (ret) return ret; @@ -235,10 +232,9 @@ static int emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset, * Returns: * Zero on success, negative error code if failed. */ -int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; int ret; if (WARN_ON(bytes > 4)) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 0084ece8d8ff..1238b75fe3f8 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1418,8 +1418,8 @@ static int cmd_handler_mi_op_2e(struct parser_exec_state *s) static int cmd_handler_mi_op_2f(struct parser_exec_state *s) { int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd; - int op_size = ((1 << (cmd_val(s, 0) & GENMASK(20, 19) >> 19)) * - sizeof(u32)); + int op_size = (1 << ((cmd_val(s, 0) & GENMASK(20, 19)) >> 19)) * + sizeof(u32); unsigned long gma, gma_high; int ret = 0; diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 6554da9f9f5b..7eaaf1c9ed2b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -138,36 +138,6 @@ int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, memcpy(&(e)->val64, &v, sizeof(v)); \ } while (0) -enum { - GTT_TYPE_INVALID = -1, - - GTT_TYPE_GGTT_PTE, - - GTT_TYPE_PPGTT_PTE_4K_ENTRY, - GTT_TYPE_PPGTT_PTE_2M_ENTRY, - GTT_TYPE_PPGTT_PTE_1G_ENTRY, - - GTT_TYPE_PPGTT_PTE_ENTRY, - - GTT_TYPE_PPGTT_PDE_ENTRY, - GTT_TYPE_PPGTT_PDP_ENTRY, - GTT_TYPE_PPGTT_PML4_ENTRY, - - GTT_TYPE_PPGTT_ROOT_ENTRY, - - GTT_TYPE_PPGTT_ROOT_L3_ENTRY, - GTT_TYPE_PPGTT_ROOT_L4_ENTRY, - - GTT_TYPE_PPGTT_ENTRY, - - GTT_TYPE_PPGTT_PTE_PT, - GTT_TYPE_PPGTT_PDE_PT, - GTT_TYPE_PPGTT_PDP_PT, - GTT_TYPE_PPGTT_PML4_PT, - - GTT_TYPE_MAX, -}; - /* * Mappings between GTT_TYPE* enumerations. * Following information can be found according to the given type: @@ -842,13 +812,18 @@ static int ppgtt_invalidate_shadow_page_by_shadow_entry(struct intel_vgpu *vgpu, { struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; struct intel_vgpu_ppgtt_spt *s; + intel_gvt_gtt_type_t cur_pt_type; if (WARN_ON(!gtt_type_is_pt(get_next_pt_type(e->type)))) return -EINVAL; - if (ops->get_pfn(e) == vgpu->gtt.scratch_page_mfn) - return 0; - + if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY + && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + cur_pt_type = get_next_pt_type(e->type) + 1; + if (ops->get_pfn(e) == + vgpu->gtt.scratch_pt[cur_pt_type].page_mfn) + return 0; + } s = ppgtt_find_shadow_page(vgpu, ops->get_pfn(e)); if (!s) { gvt_err("vgpu%d: fail to find shadow page: mfn: 0x%lx\n", @@ -999,7 +974,7 @@ fail: } static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, - struct intel_gvt_gtt_entry *we, unsigned long index) + unsigned long index) { struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu_shadow_page *sp = &spt->shadow_page; @@ -1008,34 +983,35 @@ static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_guest_page *gpt, struct intel_gvt_gtt_entry e; int ret; - trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, - we->val64, index); - ppgtt_get_shadow_entry(spt, &e, index); + + trace_gpt_change(spt->vgpu->id, "remove", spt, sp->type, e.val64, + index); + if (!ops->test_present(&e)) return 0; - if (ops->get_pfn(&e) == vgpu->gtt.scratch_page_mfn) + if (ops->get_pfn(&e) == vgpu->gtt.scratch_pt[sp->type].page_mfn) return 0; - if (gtt_type_is_pt(get_next_pt_type(we->type))) { - struct intel_vgpu_guest_page *g = - intel_vgpu_find_guest_page(vgpu, ops->get_pfn(we)); - if (!g) { + if (gtt_type_is_pt(get_next_pt_type(e.type))) { + struct intel_vgpu_ppgtt_spt *s = + ppgtt_find_shadow_page(vgpu, ops->get_pfn(&e)); + if (!s) { gvt_err("fail to find guest page\n"); ret = -ENXIO; goto fail; } - ret = ppgtt_invalidate_shadow_page(guest_page_to_ppgtt_spt(g)); + ret = ppgtt_invalidate_shadow_page(s); if (ret) goto fail; } - ops->set_pfn(&e, vgpu->gtt.scratch_page_mfn); + ops->set_pfn(&e, vgpu->gtt.scratch_pt[sp->type].page_mfn); ppgtt_set_shadow_entry(spt, &e, index); return 0; fail: gvt_err("vgpu%d: fail: shadow page %p guest entry 0x%llx type %d\n", - vgpu->id, spt, we->val64, we->type); + vgpu->id, spt, e.val64, e.type); return ret; } @@ -1256,23 +1232,16 @@ static int ppgtt_handle_guest_write_page_table( struct intel_vgpu_ppgtt_spt *spt = guest_page_to_ppgtt_spt(gpt); struct intel_vgpu *vgpu = spt->vgpu; struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; - struct intel_gvt_gtt_entry ge; - int old_present, new_present; int ret; + int new_present; - ppgtt_get_guest_entry(spt, &ge, index); - - old_present = ops->test_present(&ge); new_present = ops->test_present(we); - ppgtt_set_guest_entry(spt, we, index); + ret = ppgtt_handle_guest_entry_removal(gpt, index); + if (ret) + goto fail; - if (old_present) { - ret = ppgtt_handle_guest_entry_removal(gpt, &ge, index); - if (ret) - goto fail; - } if (new_present) { ret = ppgtt_handle_guest_entry_add(gpt, we, index); if (ret) @@ -1318,7 +1287,7 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) { struct list_head *pos, *n; struct intel_vgpu_ppgtt_spt *spt; - struct intel_gvt_gtt_entry ge, e; + struct intel_gvt_gtt_entry ge; unsigned long index; int ret; @@ -1329,9 +1298,6 @@ int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu) for_each_set_bit(index, spt->post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE) { ppgtt_get_guest_entry(spt, &ge, index); - e = ge; - e.val64 = 0; - ppgtt_set_guest_entry(spt, &e, index); ret = ppgtt_handle_guest_write_page_table( &spt->guest_page, &ge, index); @@ -1359,8 +1325,6 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift; ppgtt_get_guest_entry(spt, &we, index); - memcpy((void *)&we.val64 + (pa & (info->gtt_entry_size - 1)), - p_data, bytes); ops->test_pse(&we); @@ -1369,19 +1333,13 @@ static int ppgtt_handle_guest_write_page_table_bytes(void *gp, if (ret) return ret; } else { - struct intel_gvt_gtt_entry ge; - - ppgtt_get_guest_entry(spt, &ge, index); - if (!test_bit(index, spt->post_shadow_bitmap)) { - ret = ppgtt_handle_guest_entry_removal(gpt, - &ge, index); + ret = ppgtt_handle_guest_entry_removal(gpt, index); if (ret) return ret; } ppgtt_set_post_shadow(spt, index); - ppgtt_set_guest_entry(spt, &we, index); } if (!enable_out_of_sync) @@ -1921,47 +1879,101 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, return ret; } -static int create_scratch_page(struct intel_vgpu *vgpu) +static int alloc_scratch_pages(struct intel_vgpu *vgpu, + intel_gvt_gtt_type_t type) { struct intel_vgpu_gtt *gtt = &vgpu->gtt; - void *p; - void *vaddr; + struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops; + int page_entry_num = GTT_PAGE_SIZE >> + vgpu->gvt->device_info.gtt_entry_size_shift; + struct page *scratch_pt; unsigned long mfn; + int i; + void *p; + + if (WARN_ON(type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX)) + return -EINVAL; - gtt->scratch_page = alloc_page(GFP_KERNEL); - if (!gtt->scratch_page) { - gvt_err("Failed to allocate scratch page.\n"); + scratch_pt = alloc_page(GFP_KERNEL | GFP_ATOMIC | __GFP_ZERO); + if (!scratch_pt) { + gvt_err("fail to allocate scratch page\n"); return -ENOMEM; } - /* set to zero */ - p = kmap_atomic(gtt->scratch_page); - memset(p, 0, PAGE_SIZE); + p = kmap_atomic(scratch_pt); + mfn = intel_gvt_hypervisor_virt_to_mfn(p); + if (mfn == INTEL_GVT_INVALID_ADDR) { + gvt_err("fail to translate vaddr:0x%llx\n", (u64)p); + kunmap_atomic(p); + __free_page(scratch_pt); + return -EFAULT; + } + gtt->scratch_pt[type].page_mfn = mfn; + gtt->scratch_pt[type].page = scratch_pt; + gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n", + vgpu->id, type, mfn); + + /* Build the tree by full filled the scratch pt with the entries which + * point to the next level scratch pt or scratch page. The + * scratch_pt[type] indicate the scratch pt/scratch page used by the + * 'type' pt. + * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by + * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scatch_pt it self + * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn. + */ + if (type > GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX) { + struct intel_gvt_gtt_entry se; + + memset(&se, 0, sizeof(struct intel_gvt_gtt_entry)); + se.type = get_entry_type(type - 1); + ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn); + + /* The entry parameters like present/writeable/cache type + * set to the same as i915's scratch page tree. + */ + se.val64 |= _PAGE_PRESENT | _PAGE_RW; + if (type == GTT_TYPE_PPGTT_PDE_PT) + se.val64 |= PPAT_CACHED_INDEX; + + for (i = 0; i < page_entry_num; i++) + ops->set_entry(p, &se, i, false, 0, vgpu); + } + kunmap_atomic(p); - /* translate page to mfn */ - vaddr = page_address(gtt->scratch_page); - mfn = intel_gvt_hypervisor_virt_to_mfn(vaddr); + return 0; +} + +static int release_scratch_page_tree(struct intel_vgpu *vgpu) +{ + int i; - if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_err("fail to translate vaddr: 0x%p\n", vaddr); - __free_page(gtt->scratch_page); - gtt->scratch_page = NULL; - return -ENXIO; + for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { + if (vgpu->gtt.scratch_pt[i].page != NULL) { + __free_page(vgpu->gtt.scratch_pt[i].page); + vgpu->gtt.scratch_pt[i].page = NULL; + vgpu->gtt.scratch_pt[i].page_mfn = 0; + } } - gtt->scratch_page_mfn = mfn; - gvt_dbg_core("vgpu%d create scratch page: mfn=0x%lx\n", vgpu->id, mfn); return 0; } -static void release_scratch_page(struct intel_vgpu *vgpu) +static int create_scratch_page_tree(struct intel_vgpu *vgpu) { - if (vgpu->gtt.scratch_page != NULL) { - __free_page(vgpu->gtt.scratch_page); - vgpu->gtt.scratch_page = NULL; - vgpu->gtt.scratch_page_mfn = 0; + int i, ret; + + for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) { + ret = alloc_scratch_pages(vgpu, i); + if (ret) + goto err; } + + return 0; + +err: + release_scratch_page_tree(vgpu); + return ret; } /** @@ -1995,7 +2007,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) gtt->ggtt_mm = ggtt_mm; - return create_scratch_page(vgpu); + return create_scratch_page_tree(vgpu); } /** @@ -2014,7 +2026,7 @@ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) struct intel_vgpu_mm *mm; ppgtt_free_all_shadow_page(vgpu); - release_scratch_page(vgpu); + release_scratch_page_tree(vgpu); list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { mm = container_of(pos, struct intel_vgpu_mm, list); diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index e4dcde78f3f9..d250013bc37b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -88,6 +88,36 @@ enum { INTEL_GVT_MM_PPGTT, }; +typedef enum { + GTT_TYPE_INVALID = -1, + + GTT_TYPE_GGTT_PTE, + + GTT_TYPE_PPGTT_PTE_4K_ENTRY, + GTT_TYPE_PPGTT_PTE_2M_ENTRY, + GTT_TYPE_PPGTT_PTE_1G_ENTRY, + + GTT_TYPE_PPGTT_PTE_ENTRY, + + GTT_TYPE_PPGTT_PDE_ENTRY, + GTT_TYPE_PPGTT_PDP_ENTRY, + GTT_TYPE_PPGTT_PML4_ENTRY, + + GTT_TYPE_PPGTT_ROOT_ENTRY, + + GTT_TYPE_PPGTT_ROOT_L3_ENTRY, + GTT_TYPE_PPGTT_ROOT_L4_ENTRY, + + GTT_TYPE_PPGTT_ENTRY, + + GTT_TYPE_PPGTT_PTE_PT, + GTT_TYPE_PPGTT_PDE_PT, + GTT_TYPE_PPGTT_PDP_PT, + GTT_TYPE_PPGTT_PML4_PT, + + GTT_TYPE_MAX, +} intel_gvt_gtt_type_t; + struct intel_vgpu_mm { int type; bool initialized; @@ -151,6 +181,12 @@ extern void intel_vgpu_destroy_mm(struct kref *mm_ref); struct intel_vgpu_guest_page; +struct intel_vgpu_scratch_pt { + struct page *page; + unsigned long page_mfn; +}; + + struct intel_vgpu_gtt { struct intel_vgpu_mm *ggtt_mm; unsigned long active_ppgtt_mm_bitmap; @@ -160,8 +196,8 @@ struct intel_vgpu_gtt { atomic_t n_write_protected_guest_page; struct list_head oos_page_list_head; struct list_head post_shadow_list_head; - struct page *scratch_page; - unsigned long scratch_page_mfn; + struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX]; + }; extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 385969a89216..398877c3d2fd 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -44,11 +44,14 @@ static const char * const supported_hypervisors[] = { [INTEL_GVT_HYPERVISOR_KVM] = "KVM", }; -struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops = { +static const struct intel_gvt_ops intel_gvt_ops = { .emulate_cfg_read = intel_vgpu_emulate_cfg_read, .emulate_cfg_write = intel_vgpu_emulate_cfg_write, .emulate_mmio_read = intel_vgpu_emulate_mmio_read, .emulate_mmio_write = intel_vgpu_emulate_mmio_write, + .vgpu_create = intel_gvt_create_vgpu, + .vgpu_destroy = intel_gvt_destroy_vgpu, + .vgpu_reset = intel_gvt_reset_vgpu, }; /** @@ -81,10 +84,12 @@ int intel_gvt_init_host(void) symbol_get(xengt_mpt), "xengt"); intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_XEN; } else { +#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) /* not in Xen. Try KVMGT */ intel_gvt_host.mpt = try_then_request_module( - symbol_get(kvmgt_mpt), "kvm"); + symbol_get(kvmgt_mpt), "kvmgt"); intel_gvt_host.hypervisor_type = INTEL_GVT_HYPERVISOR_KVM; +#endif } /* Fail to load MPT modules - bail out */ @@ -193,6 +198,9 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv) intel_gvt_clean_mmio_info(gvt); intel_gvt_free_firmware(gvt); + intel_gvt_hypervisor_host_exit(&dev_priv->drm.pdev->dev, gvt); + intel_gvt_clean_vgpu_types(gvt); + kfree(dev_priv->gvt); dev_priv->gvt = NULL; } @@ -270,10 +278,25 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv) if (ret) goto out_clean_cmd_parser; - gvt_dbg_core("gvt device creation is done\n"); + ret = intel_gvt_init_vgpu_types(gvt); + if (ret) + goto out_clean_thread; + + ret = intel_gvt_hypervisor_host_init(&dev_priv->drm.pdev->dev, gvt, + &intel_gvt_ops); + if (ret) { + gvt_err("failed to register gvt-g host device: %d\n", ret); + goto out_clean_types; + } + + gvt_dbg_core("gvt device initialization is done\n"); dev_priv->gvt = gvt; return 0; +out_clean_types: + intel_gvt_clean_vgpu_types(gvt); +out_clean_thread: + clean_service_thread(gvt); out_clean_cmd_parser: intel_gvt_clean_cmd_parser(gvt); out_clean_sched_policy: diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 62fc9e3ac5c6..3d4223e8ebe3 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -161,6 +161,20 @@ struct intel_vgpu { DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES); struct i915_gem_context *shadow_ctx; struct notifier_block shadow_ctx_notifier_block; + +#if IS_ENABLED(CONFIG_DRM_I915_GVT_KVMGT) + struct { + struct device *mdev; + struct vfio_region *region; + int num_regions; + struct eventfd_ctx *intx_trigger; + struct eventfd_ctx *msi_trigger; + struct rb_root cache; + struct mutex cache_lock; + void *vfio_group; + struct notifier_block iommu_notifier; + } vdev; +#endif }; struct intel_gvt_gm { @@ -190,6 +204,16 @@ struct intel_gvt_opregion { u32 opregion_pa; }; +#define NR_MAX_INTEL_VGPU_TYPES 20 +struct intel_vgpu_type { + char name[16]; + unsigned int max_instance; + unsigned int avail_instance; + unsigned int low_gm_size; + unsigned int high_gm_size; + unsigned int fence; +}; + struct intel_gvt { struct mutex lock; struct drm_i915_private *dev_priv; @@ -205,6 +229,8 @@ struct intel_gvt { struct intel_gvt_opregion opregion; struct intel_gvt_workload_scheduler scheduler; DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS); + struct intel_vgpu_type *types; + unsigned int num_types; struct task_struct *service_thread; wait_queue_head_t service_thread_wq; @@ -231,6 +257,14 @@ void intel_gvt_free_firmware(struct intel_gvt *gvt); int intel_gvt_load_firmware(struct intel_gvt *gvt); /* Aperture/GM space definitions for GVT device */ +#define MB_TO_BYTES(mb) ((mb) << 20ULL) +#define BYTES_TO_MB(b) ((b) >> 20ULL) + +#define HOST_LOW_GM_SIZE MB_TO_BYTES(128) +#define HOST_HIGH_GM_SIZE MB_TO_BYTES(384) +#define HOST_FENCE 4 + +/* Aperture/GM space definitions for GVT device */ #define gvt_aperture_sz(gvt) (gvt->dev_priv->ggtt.mappable_end) #define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.mappable_base) @@ -330,11 +364,14 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu, } } -struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, - struct intel_vgpu_creation_params * - param); +int intel_gvt_init_vgpu_types(struct intel_gvt *gvt); +void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt); +struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, + struct intel_vgpu_type *type); void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu); +void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu); + /* validating GM functions */ #define vgpu_gmadr_is_aperture(vgpu, gmadr) \ @@ -369,10 +406,10 @@ int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index, int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index, unsigned long *g_index); -int intel_vgpu_emulate_cfg_read(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); -int intel_vgpu_emulate_cfg_write(void *__vgpu, unsigned int offset, +int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes); void intel_gvt_clean_opregion(struct intel_gvt *gvt); @@ -385,6 +422,22 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); int setup_vgpu_mmio(struct intel_vgpu *vgpu); void populate_pvinfo_page(struct intel_vgpu *vgpu); +struct intel_gvt_ops { + int (*emulate_cfg_read)(struct intel_vgpu *, unsigned int, void *, + unsigned int); + int (*emulate_cfg_write)(struct intel_vgpu *, unsigned int, void *, + unsigned int); + int (*emulate_mmio_read)(struct intel_vgpu *, u64, void *, + unsigned int); + int (*emulate_mmio_write)(struct intel_vgpu *, u64, void *, + unsigned int); + struct intel_vgpu *(*vgpu_create)(struct intel_gvt *, + struct intel_vgpu_type *); + void (*vgpu_destroy)(struct intel_vgpu *); + void (*vgpu_reset)(struct intel_vgpu *); +}; + + #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 9ab1f95dddc5..1b3db0c7a6db 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1158,7 +1158,10 @@ static int fpga_dbg_mmio_write(struct intel_vgpu *vgpu, static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { - u32 mode = *(u32 *)p_data; + u32 mode; + + write_vreg(vgpu, offset, p_data, bytes); + mode = vgpu_vreg(vgpu, offset); if (GFX_MODE_BIT_SET_IN_MASK(mode, START_DMA)) { WARN_ONCE(1, "VM(%d): iGVT-g doesn't supporte GuC\n", @@ -1275,19 +1278,20 @@ static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset, switch (offset) { case 0x4ddc: vgpu_vreg(vgpu, offset) = 0x8000003c; + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, REVID_FOREVER)) + I915_WRITE(reg, vgpu_vreg(vgpu, offset)); break; case 0x42080: vgpu_vreg(vgpu, offset) = 0x8000; + /* WaCompressedResourceDisplayNewHashMode:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_E0, REVID_FOREVER)) + I915_WRITE(reg, vgpu_vreg(vgpu, offset)); break; default: return -EINVAL; } - /** - * TODO: need detect stepping info after gvt contain such information - * 0x4ddc enabled after C0, 0x42080 enabled after E0. - */ - I915_WRITE(reg, vgpu_vreg(vgpu, offset)); return 0; } @@ -1367,6 +1371,8 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, int rc = 0; unsigned int id = 0; + write_vreg(vgpu, offset, p_data, bytes); + switch (offset) { case 0x4260: id = RCS; @@ -1392,6 +1398,23 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu, return rc; } +static int ring_reset_ctl_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 data; + + write_vreg(vgpu, offset, p_data, bytes); + data = vgpu_vreg(vgpu, offset); + + if (data & _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)) + data |= RESET_CTL_READY_TO_RESET; + else if (data & _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)) + data &= ~RESET_CTL_READY_TO_RESET; + + vgpu_vreg(vgpu, offset) = data; + return 0; +} + #define MMIO_F(reg, s, f, am, rm, d, r, w) do { \ ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \ f, s, am, rm, d, r, w); \ @@ -2298,6 +2321,15 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS); +#define RING_REG(base) (base + 0xd0) + MMIO_RING_F(RING_REG, 4, F_RO, 0, + ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, + ring_reset_ctl_write); + MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, + ~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL, + ring_reset_ctl_write); +#undef RING_REG + #define RING_REG(base) (base + 0x230) MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write); MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write); diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h index 027ef558d91c..30e543f5a703 100644 --- a/drivers/gpu/drm/i915/gvt/hypercall.h +++ b/drivers/gpu/drm/i915/gvt/hypercall.h @@ -33,21 +33,14 @@ #ifndef _GVT_HYPERCALL_H_ #define _GVT_HYPERCALL_H_ -struct intel_gvt_io_emulation_ops { - int (*emulate_cfg_read)(void *, unsigned int, void *, unsigned int); - int (*emulate_cfg_write)(void *, unsigned int, void *, unsigned int); - int (*emulate_mmio_read)(void *, u64, void *, unsigned int); - int (*emulate_mmio_write)(void *, u64, void *, unsigned int); -}; - -extern struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops; - /* * Specific GVT-g MPT modules function collections. Currently GVT-g supports * both Xen and KVM by providing dedicated hypervisor-related MPT modules. */ struct intel_gvt_mpt { int (*detect_host)(void); + int (*host_init)(struct device *dev, void *gvt, const void *ops); + void (*host_exit)(struct device *dev, void *gvt); int (*attach_vgpu)(void *vgpu, unsigned long *handle); void (*detach_vgpu)(unsigned long handle); int (*inject_msi)(unsigned long handle, u32 addr, u16 data); @@ -60,8 +53,7 @@ struct intel_gvt_mpt { unsigned long len); unsigned long (*gfn_to_mfn)(unsigned long handle, unsigned long gfn); int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn, - unsigned long mfn, unsigned int nr, bool map, - int type); + unsigned long mfn, unsigned int nr, bool map); int (*set_trap_area)(unsigned long handle, u64 start, u64 end, bool map); }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c new file mode 100644 index 000000000000..5bf4d73d57d9 --- /dev/null +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -0,0 +1,601 @@ +/* + * KVMGT - the implementation of Intel mediated pass-through framework for KVM + * + * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Kevin Tian <kevin.tian@intel.com> + * Jike Song <jike.song@intel.com> + * Xiaoguang Chen <xiaoguang.chen@intel.com> + */ + +#include <linux/init.h> +#include <linux/device.h> +#include <linux/mm.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/spinlock.h> +#include <linux/eventfd.h> +#include <linux/uuid.h> +#include <linux/kvm_host.h> +#include <linux/vfio.h> + +#include "i915_drv.h" +#include "gvt.h" + +#if IS_ENABLED(CONFIG_VFIO_MDEV) +#include <linux/mdev.h> +#else +static inline long vfio_pin_pages(struct device *dev, unsigned long *user_pfn, + long npage, int prot, unsigned long *phys_pfn) +{ + return 0; +} +static inline long vfio_unpin_pages(struct device *dev, unsigned long *pfn, + long npage) +{ + return 0; +} +#endif + +static const struct intel_gvt_ops *intel_gvt_ops; + + +/* helper macros copied from vfio-pci */ +#define VFIO_PCI_OFFSET_SHIFT 40 +#define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) +#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) + +struct vfio_region { + u32 type; + u32 subtype; + size_t size; + u32 flags; +}; + +struct kvmgt_pgfn { + gfn_t gfn; + struct hlist_node hnode; +}; + +struct kvmgt_guest_info { + struct kvm *kvm; + struct intel_vgpu *vgpu; + struct kvm_page_track_notifier_node track_node; +#define NR_BKT (1 << 18) + struct hlist_head ptable[NR_BKT]; +#undef NR_BKT +}; + +struct gvt_dma { + struct rb_node node; + gfn_t gfn; + kvm_pfn_t pfn; +}; + +static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct rb_node *node = vgpu->vdev.cache.rb_node; + struct gvt_dma *ret = NULL; + + while (node) { + struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node); + + if (gfn < itr->gfn) + node = node->rb_left; + else if (gfn > itr->gfn) + node = node->rb_right; + else { + ret = itr; + goto out; + } + } + +out: + return ret; +} + +static kvm_pfn_t gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct gvt_dma *entry; + + mutex_lock(&vgpu->vdev.cache_lock); + entry = __gvt_cache_find(vgpu, gfn); + mutex_unlock(&vgpu->vdev.cache_lock); + + return entry == NULL ? 0 : entry->pfn; +} + +static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kvm_pfn_t pfn) +{ + struct gvt_dma *new, *itr; + struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL; + + new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); + if (!new) + return; + + new->gfn = gfn; + new->pfn = pfn; + + mutex_lock(&vgpu->vdev.cache_lock); + while (*link) { + parent = *link; + itr = rb_entry(parent, struct gvt_dma, node); + + if (gfn == itr->gfn) + goto out; + else if (gfn < itr->gfn) + link = &parent->rb_left; + else + link = &parent->rb_right; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, &vgpu->vdev.cache); + mutex_unlock(&vgpu->vdev.cache_lock); + return; + +out: + mutex_unlock(&vgpu->vdev.cache_lock); + kfree(new); +} + +static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, + struct gvt_dma *entry) +{ + rb_erase(&entry->node, &vgpu->vdev.cache); + kfree(entry); +} + +static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn) +{ + struct device *dev = vgpu->vdev.mdev; + struct gvt_dma *this; + unsigned long pfn; + + mutex_lock(&vgpu->vdev.cache_lock); + this = __gvt_cache_find(vgpu, gfn); + if (!this) { + mutex_unlock(&vgpu->vdev.cache_lock); + return; + } + + pfn = this->pfn; + WARN_ON((vfio_unpin_pages(dev, &pfn, 1) != 1)); + __gvt_cache_remove_entry(vgpu, this); + mutex_unlock(&vgpu->vdev.cache_lock); +} + +static void gvt_cache_init(struct intel_vgpu *vgpu) +{ + vgpu->vdev.cache = RB_ROOT; + mutex_init(&vgpu->vdev.cache_lock); +} + +static void gvt_cache_destroy(struct intel_vgpu *vgpu) +{ + struct gvt_dma *dma; + struct rb_node *node = NULL; + struct device *dev = vgpu->vdev.mdev; + unsigned long pfn; + + mutex_lock(&vgpu->vdev.cache_lock); + while ((node = rb_first(&vgpu->vdev.cache))) { + dma = rb_entry(node, struct gvt_dma, node); + pfn = dma->pfn; + + vfio_unpin_pages(dev, &pfn, 1); + __gvt_cache_remove_entry(vgpu, dma); + } + mutex_unlock(&vgpu->vdev.cache_lock); +} + +static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt, + const char *name) +{ + int i; + struct intel_vgpu_type *t; + const char *driver_name = dev_driver_string( + &gvt->dev_priv->drm.pdev->dev); + + for (i = 0; i < gvt->num_types; i++) { + t = &gvt->types[i]; + if (!strncmp(t->name, name + strlen(driver_name) + 1, + sizeof(t->name))) + return t; + } + + return NULL; +} + +static struct attribute *type_attrs[] = { + NULL, +}; + +static struct attribute_group *intel_vgpu_type_groups[] = { + [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL, +}; + +static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i, j; + struct intel_vgpu_type *type; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + type = &gvt->types[i]; + + group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL); + if (WARN_ON(!group)) + goto unwind; + + group->name = type->name; + group->attrs = type_attrs; + intel_vgpu_type_groups[i] = group; + } + + return true; + +unwind: + for (j = 0; j < i; j++) { + group = intel_vgpu_type_groups[j]; + kfree(group); + } + + return false; +} + +static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) +{ + int i; + struct attribute_group *group; + + for (i = 0; i < gvt->num_types; i++) { + group = intel_vgpu_type_groups[i]; + kfree(group); + } +} + +static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) +{ + hash_init(info->ptable); +} + +static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) +{ + struct kvmgt_pgfn *p; + struct hlist_node *tmp; + int i; + + hash_for_each_safe(info->ptable, i, tmp, p, hnode) { + hash_del(&p->hnode); + kfree(p); + } +} + +static struct kvmgt_pgfn * +__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) +{ + struct kvmgt_pgfn *p, *res = NULL; + + hash_for_each_possible(info->ptable, p, hnode, gfn) { + if (gfn == p->gfn) { + res = p; + break; + } + } + + return res; +} + +static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, + gfn_t gfn) +{ + struct kvmgt_pgfn *p; + + p = __kvmgt_protect_table_find(info, gfn); + return !!p; +} + +static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) +{ + struct kvmgt_pgfn *p; + + if (kvmgt_gfn_is_write_protected(info, gfn)) + return; + + p = kmalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC); + if (WARN(!p, "gfn: 0x%llx\n", gfn)) + return; + + p->gfn = gfn; + hash_add(info->ptable, &p->hnode, gfn); +} + +static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, + gfn_t gfn) +{ + struct kvmgt_pgfn *p; + + p = __kvmgt_protect_table_find(info, gfn); + if (p) { + hash_del(&p->hnode); + kfree(p); + } +} + +static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) +{ + if (!intel_gvt_init_vgpu_type_groups(gvt)) + return -EFAULT; + + intel_gvt_ops = ops; + + /* MDEV is not yet available */ + return -ENODEV; +} + +static void kvmgt_host_exit(struct device *dev, void *gvt) +{ + intel_gvt_cleanup_vgpu_type_groups(gvt); +} + +static int kvmgt_write_protect_add(unsigned long handle, u64 gfn) +{ + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + struct kvm *kvm = info->kvm; + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + slot = gfn_to_memslot(kvm, gfn); + + spin_lock(&kvm->mmu_lock); + + if (kvmgt_gfn_is_write_protected(info, gfn)) + goto out; + + kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + kvmgt_protect_table_add(info, gfn); + +out: + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + return 0; +} + +static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn) +{ + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + struct kvm *kvm = info->kvm; + struct kvm_memory_slot *slot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + slot = gfn_to_memslot(kvm, gfn); + + spin_lock(&kvm->mmu_lock); + + if (!kvmgt_gfn_is_write_protected(info, gfn)) + goto out; + + kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); + kvmgt_protect_table_del(info, gfn); + +out: + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); + return 0; +} + +static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *val, int len, + struct kvm_page_track_notifier_node *node) +{ + struct kvmgt_guest_info *info = container_of(node, + struct kvmgt_guest_info, track_node); + + if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) + intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa, + (void *)val, len); +} + +static void kvmgt_page_track_flush_slot(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvm_page_track_notifier_node *node) +{ + int i; + gfn_t gfn; + struct kvmgt_guest_info *info = container_of(node, + struct kvmgt_guest_info, track_node); + + spin_lock(&kvm->mmu_lock); + for (i = 0; i < slot->npages; i++) { + gfn = slot->base_gfn + i; + if (kvmgt_gfn_is_write_protected(info, gfn)) { + kvm_slot_page_track_remove_page(kvm, slot, gfn, + KVM_PAGE_TRACK_WRITE); + kvmgt_protect_table_del(info, gfn); + } + } + spin_unlock(&kvm->mmu_lock); +} + +static bool kvmgt_check_guest(void) +{ + unsigned int eax, ebx, ecx, edx; + char s[12]; + unsigned int *i; + + eax = KVM_CPUID_SIGNATURE; + ebx = ecx = edx = 0; + + asm volatile ("cpuid" + : "+a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : + : "cc", "memory"); + i = (unsigned int *)s; + i[0] = ebx; + i[1] = ecx; + i[2] = edx; + + return !strncmp(s, "KVMKVMKVM", strlen("KVMKVMKVM")); +} + +/** + * NOTE: + * It's actually impossible to check if we are running in KVM host, + * since the "KVM host" is simply native. So we only dectect guest here. + */ +static int kvmgt_detect_host(void) +{ +#ifdef CONFIG_INTEL_IOMMU + if (intel_iommu_gfx_mapped) { + gvt_err("Hardware IOMMU compatibility not yet supported, try to boot with intel_iommu=igfx_off\n"); + return -ENODEV; + } +#endif + return kvmgt_check_guest() ? -ENODEV : 0; +} + +static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle) +{ + /* nothing to do here */ + return 0; +} + +static void kvmgt_detach_vgpu(unsigned long handle) +{ + /* nothing to do here */ +} + +static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) +{ + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + struct intel_vgpu *vgpu = info->vgpu; + + if (vgpu->vdev.msi_trigger) + return eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1; + + return false; +} + +static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) +{ + unsigned long pfn; + struct kvmgt_guest_info *info = (struct kvmgt_guest_info *)handle; + int rc; + + pfn = gvt_cache_find(info->vgpu, gfn); + if (pfn != 0) + return pfn; + + rc = vfio_pin_pages(info->vgpu->vdev.mdev, &gfn, 1, + IOMMU_READ | IOMMU_WRITE, &pfn); + if (rc != 1) { + gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn); + return 0; + } + + gvt_cache_add(info->vgpu, gfn, pfn); + return pfn; +} + +static void *kvmgt_gpa_to_hva(unsigned long handle, unsigned long gpa) +{ + unsigned long pfn; + gfn_t gfn = gpa_to_gfn(gpa); + + pfn = kvmgt_gfn_to_pfn(handle, gfn); + if (!pfn) + return NULL; + + return (char *)pfn_to_kaddr(pfn) + offset_in_page(gpa); +} + +static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, + void *buf, unsigned long len, bool write) +{ + void *hva = NULL; + + hva = kvmgt_gpa_to_hva(handle, gpa); + if (!hva) + return -EFAULT; + + if (write) + memcpy(hva, buf, len); + else + memcpy(buf, hva, len); + + return 0; +} + +static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, + void *buf, unsigned long len) +{ + return kvmgt_rw_gpa(handle, gpa, buf, len, false); +} + +static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, + void *buf, unsigned long len) +{ + return kvmgt_rw_gpa(handle, gpa, buf, len, true); +} + +static unsigned long kvmgt_virt_to_pfn(void *addr) +{ + return PFN_DOWN(__pa(addr)); +} + +struct intel_gvt_mpt kvmgt_mpt = { + .detect_host = kvmgt_detect_host, + .host_init = kvmgt_host_init, + .host_exit = kvmgt_host_exit, + .attach_vgpu = kvmgt_attach_vgpu, + .detach_vgpu = kvmgt_detach_vgpu, + .inject_msi = kvmgt_inject_msi, + .from_virt_to_mfn = kvmgt_virt_to_pfn, + .set_wp_page = kvmgt_write_protect_add, + .unset_wp_page = kvmgt_write_protect_remove, + .read_gpa = kvmgt_read_gpa, + .write_gpa = kvmgt_write_gpa, + .gfn_to_mfn = kvmgt_gfn_to_pfn, +}; +EXPORT_SYMBOL_GPL(kvmgt_mpt); + +static int __init kvmgt_init(void) +{ + return 0; +} + +static void __exit kvmgt_exit(void) +{ +} + +module_init(kvmgt_init); +module_exit(kvmgt_exit); + +MODULE_LICENSE("GPL and additional rights"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 585b01f63254..09c9450a1946 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -67,10 +67,9 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_read(void *__vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_mmio_info *mmio; unsigned int offset = 0; @@ -179,10 +178,9 @@ err: * Returns: * Zero on success, negative error code if failed */ -int intel_vgpu_emulate_mmio_write(void *__vgpu, uint64_t pa, +int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, void *p_data, unsigned int bytes) { - struct intel_vgpu *vgpu = __vgpu; struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt_mmio_info *mmio; unsigned int offset = 0; diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h index 9dc739a01892..87d5b5e366a3 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.h +++ b/drivers/gpu/drm/i915/gvt/mmio.h @@ -87,10 +87,11 @@ struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt, }) int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa); -int intel_vgpu_emulate_mmio_read(void *__vgpu, u64 pa, void *p_data, - unsigned int bytes); -int intel_vgpu_emulate_mmio_write(void *__vgpu, u64 pa, void *p_data, - unsigned int bytes); + +int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes); +int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa, + void *p_data, unsigned int bytes); bool intel_gvt_mmio_is_cmd_access(struct intel_gvt *gvt, unsigned int offset); bool intel_gvt_mmio_is_unalign(struct intel_gvt *gvt, unsigned int offset); diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h index 67858782d327..1af5830c0a56 100644 --- a/drivers/gpu/drm/i915/gvt/mpt.h +++ b/drivers/gpu/drm/i915/gvt/mpt.h @@ -56,6 +56,35 @@ static inline int intel_gvt_hypervisor_detect_host(void) } /** + * intel_gvt_hypervisor_host_init - init GVT-g host side + * + * Returns: + * Zero on success, negative error code if failed + */ +static inline int intel_gvt_hypervisor_host_init(struct device *dev, + void *gvt, const void *ops) +{ + /* optional to provide */ + if (!intel_gvt_host.mpt->host_init) + return 0; + + return intel_gvt_host.mpt->host_init(dev, gvt, ops); +} + +/** + * intel_gvt_hypervisor_host_exit - exit GVT-g host side + */ +static inline void intel_gvt_hypervisor_host_exit(struct device *dev, + void *gvt) +{ + /* optional to provide */ + if (!intel_gvt_host.mpt->host_exit) + return; + + intel_gvt_host.mpt->host_exit(dev, gvt); +} + +/** * intel_gvt_hypervisor_attach_vgpu - call hypervisor to initialize vGPU * related stuffs inside hypervisor. * @@ -64,6 +93,10 @@ static inline int intel_gvt_hypervisor_detect_host(void) */ static inline int intel_gvt_hypervisor_attach_vgpu(struct intel_vgpu *vgpu) { + /* optional to provide */ + if (!intel_gvt_host.mpt->attach_vgpu) + return 0; + return intel_gvt_host.mpt->attach_vgpu(vgpu, &vgpu->handle); } @@ -76,6 +109,10 @@ static inline int intel_gvt_hypervisor_attach_vgpu(struct intel_vgpu *vgpu) */ static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu) { + /* optional to provide */ + if (!intel_gvt_host.mpt->detach_vgpu) + return; + intel_gvt_host.mpt->detach_vgpu(vgpu->handle); } @@ -224,11 +261,6 @@ static inline unsigned long intel_gvt_hypervisor_gfn_to_mfn( return intel_gvt_host.mpt->gfn_to_mfn(vgpu->handle, gfn); } -enum { - GVT_MAP_APERTURE = 0, - GVT_MAP_OPREGION, -}; - /** * intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN * @vgpu: a vGPU @@ -236,7 +268,6 @@ enum { * @mfn: host PFN * @nr: amount of PFNs * @map: map or unmap - * @type: map type * * Returns: * Zero on success, negative error code if failed. @@ -244,10 +275,14 @@ enum { static inline int intel_gvt_hypervisor_map_gfn_to_mfn( struct intel_vgpu *vgpu, unsigned long gfn, unsigned long mfn, unsigned int nr, - bool map, int type) + bool map) { + /* a MPT implementation could have MMIO mapped elsewhere */ + if (!intel_gvt_host.mpt->map_gfn_to_mfn) + return 0; + return intel_gvt_host.mpt->map_gfn_to_mfn(vgpu->handle, gfn, mfn, nr, - map, type); + map); } /** @@ -263,6 +298,10 @@ static inline int intel_gvt_hypervisor_map_gfn_to_mfn( static inline int intel_gvt_hypervisor_set_trap_area( struct intel_vgpu *vgpu, u64 start, u64 end, bool map) { + /* a MPT implementation could have MMIO trapped elsewhere */ + if (!intel_gvt_host.mpt->set_trap_area) + return 0; + return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map); } diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 95218913b0bc..d2a0fbc896c3 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -73,7 +73,7 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) } ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, vgpu_opregion(vgpu)->gfn[i], - mfn, 1, map, GVT_MAP_OPREGION); + mfn, 1, map); if (ret) { gvt_err("fail to map GFN to MFN, errno: %d\n", ret); return ret; @@ -89,28 +89,18 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map) */ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu) { - int i; - gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id); if (!vgpu_opregion(vgpu)->va) return; - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) { - vunmap(vgpu_opregion(vgpu)->va); - for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) { - if (vgpu_opregion(vgpu)->pages[i]) { - put_page(vgpu_opregion(vgpu)->pages[i]); - vgpu_opregion(vgpu)->pages[i] = NULL; - } - } - } else { + if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) { map_vgpu_opregion(vgpu, false); free_pages((unsigned long)vgpu_opregion(vgpu)->va, INTEL_GVT_OPREGION_PORDER); - } - vgpu_opregion(vgpu)->va = NULL; + vgpu_opregion(vgpu)->va = NULL; + } } /** @@ -137,22 +127,8 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) ret = map_vgpu_opregion(vgpu, true); if (ret) return ret; - } else { - gvt_dbg_core("emulate opregion from userspace\n"); - - /* - * If opregion pages are not allocated from host kenrel, - * most of the params are meaningless - */ - ret = intel_gvt_hypervisor_map_gfn_to_mfn(vgpu, - 0, /* not used */ - 0, /* not used */ - 2, /* not used */ - 1, - GVT_MAP_OPREGION); - if (ret) - return ret; } + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 3af894b3d257..44136b1f3aab 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -152,6 +152,8 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) if (wait_for_atomic((I915_READ_FW(reg) == 0), 50)) gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id); + else + vgpu_vreg(vgpu, regs[ring_id]) = 0; intel_uncore_forcewake_put(dev_priv, fw); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 1df6a5460f3e..678b0be85376 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -36,12 +36,10 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu) { - struct intel_vgpu_execlist *execlist; enum intel_engine_id i; struct intel_engine_cs *engine; for_each_engine(engine, vgpu->gvt->dev_priv, i) { - execlist = &vgpu->execlist[i]; if (!list_empty(workload_q_head(vgpu, i))) return true; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 18acb45dd14d..7d87c43661c5 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -89,15 +89,15 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) } page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); - dst = kmap_atomic(page); + dst = kmap(page); intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, GTT_PAGE_SIZE); - kunmap_atomic(dst); + kunmap(page); i++; } page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + shadow_ring_context = kmap(page); #define COPY_REG(name) \ intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \ @@ -123,7 +123,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); - kunmap_atomic(shadow_ring_context); + kunmap(page); return 0; } @@ -318,10 +318,10 @@ static void update_guest_context(struct intel_vgpu_workload *workload) } page = i915_gem_object_get_page(ctx_obj, LRC_PPHWSP_PN + i); - src = kmap_atomic(page); + src = kmap(page); intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, GTT_PAGE_SIZE); - kunmap_atomic(src); + kunmap(page); i++; } @@ -329,7 +329,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4); page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap_atomic(page); + shadow_ring_context = kmap(page); #define COPY_REG(name) \ intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ @@ -347,7 +347,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) sizeof(*shadow_ring_context), GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); - kunmap_atomic(shadow_ring_context); + kunmap(page); } static void complete_current_workload(struct intel_gvt *gvt, int ring_id) @@ -455,6 +455,8 @@ static int workload_thread(void *priv) if (lret < 0) { workload->status = lret; gvt_err("fail to wait workload, skip\n"); + } else { + workload->status = 0; } complete: diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 4f54005b976d..3a15feadc1df 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -133,6 +133,106 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) } /** + * intel_gvt_init_vgpu_types - initialize vGPU type list + * @gvt : GVT device + * + * Initialize vGPU type list based on available resource. + * + */ +int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) +{ + unsigned int num_types; + unsigned int i, low_avail; + unsigned int min_low; + + /* vGPU type name is defined as GVTg_Vx_y which contains + * physical GPU generation type and 'y' means maximum vGPU + * instances user can create on one physical GPU for this + * type. + * + * Depend on physical SKU resource, might see vGPU types like + * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create + * different types of vGPU on same physical GPU depending on + * available resource. Each vGPU type will have "avail_instance" + * to indicate how many vGPU instance can be created for this + * type. + * + * Currently use static size here as we init type earlier.. + */ + low_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE; + num_types = 4; + + gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type), + GFP_KERNEL); + if (!gvt->types) + return -ENOMEM; + + min_low = MB_TO_BYTES(32); + for (i = 0; i < num_types; ++i) { + if (low_avail / min_low == 0) + break; + gvt->types[i].low_gm_size = min_low; + gvt->types[i].high_gm_size = 3 * gvt->types[i].low_gm_size; + gvt->types[i].fence = 4; + gvt->types[i].max_instance = low_avail / min_low; + gvt->types[i].avail_instance = gvt->types[i].max_instance; + + if (IS_GEN8(gvt->dev_priv)) + sprintf(gvt->types[i].name, "GVTg_V4_%u", + gvt->types[i].max_instance); + else if (IS_GEN9(gvt->dev_priv)) + sprintf(gvt->types[i].name, "GVTg_V5_%u", + gvt->types[i].max_instance); + + min_low <<= 1; + gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].max_instance, + gvt->types[i].avail_instance, + gvt->types[i].low_gm_size, + gvt->types[i].high_gm_size, gvt->types[i].fence); + } + + gvt->num_types = i; + return 0; +} + +void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt) +{ + kfree(gvt->types); +} + +static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) +{ + int i; + unsigned int low_gm_avail, high_gm_avail, fence_avail; + unsigned int low_gm_min, high_gm_min, fence_min, total_min; + + /* Need to depend on maxium hw resource size but keep on + * static config for now. + */ + low_gm_avail = MB_TO_BYTES(256) - HOST_LOW_GM_SIZE - + gvt->gm.vgpu_allocated_low_gm_size; + high_gm_avail = MB_TO_BYTES(256) * 3 - HOST_HIGH_GM_SIZE - + gvt->gm.vgpu_allocated_high_gm_size; + fence_avail = gvt_fence_sz(gvt) - HOST_FENCE - + gvt->fence.vgpu_allocated_fence_num; + + for (i = 0; i < gvt->num_types; i++) { + low_gm_min = low_gm_avail / gvt->types[i].low_gm_size; + high_gm_min = high_gm_avail / gvt->types[i].high_gm_size; + fence_min = fence_avail / gvt->types[i].fence; + total_min = min(min(low_gm_min, high_gm_min), fence_min); + gvt->types[i].avail_instance = min(gvt->types[i].max_instance, + total_min); + + gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].max_instance, + gvt->types[i].avail_instance, gvt->types[i].low_gm_size, + gvt->types[i].high_gm_size, gvt->types[i].fence); + } +} + +/** * intel_gvt_destroy_vgpu - destroy a virtual GPU * @vgpu: virtual GPU * @@ -166,20 +266,11 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu) clean_vgpu_mmio(vgpu); vfree(vgpu); + intel_gvt_update_vgpu_types(gvt); mutex_unlock(&gvt->lock); } -/** - * intel_gvt_create_vgpu - create a virtual GPU - * @gvt: GVT device - * @param: vGPU creation parameters - * - * This function is called when user wants to create a virtual GPU. - * - * Returns: - * pointer to intel_vgpu, error pointer if failed. - */ -struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, +static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_creation_params *param) { struct intel_vgpu *vgpu; @@ -224,15 +315,9 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) { - ret = intel_vgpu_init_opregion(vgpu, 0); - if (ret) - goto out_clean_gtt; - } - ret = intel_vgpu_init_display(vgpu); if (ret) - goto out_clean_opregion; + goto out_clean_gtt; ret = intel_vgpu_init_execlist(vgpu); if (ret) @@ -257,8 +342,6 @@ out_clean_execlist: intel_vgpu_clean_execlist(vgpu); out_clean_display: intel_vgpu_clean_display(vgpu); -out_clean_opregion: - intel_vgpu_clean_opregion(vgpu); out_clean_gtt: intel_vgpu_clean_gtt(vgpu); out_detach_hypervisor_vgpu: @@ -272,3 +355,49 @@ out_free_vgpu: mutex_unlock(&gvt->lock); return ERR_PTR(ret); } + +/** + * intel_gvt_create_vgpu - create a virtual GPU + * @gvt: GVT device + * @type: type of the vGPU to create + * + * This function is called when user wants to create a virtual GPU. + * + * Returns: + * pointer to intel_vgpu, error pointer if failed. + */ +struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, + struct intel_vgpu_type *type) +{ + struct intel_vgpu_creation_params param; + struct intel_vgpu *vgpu; + + param.handle = 0; + param.low_gm_sz = type->low_gm_size; + param.high_gm_sz = type->high_gm_size; + param.fence_sz = type->fence; + + /* XXX current param based on MB */ + param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); + param.high_gm_sz = BYTES_TO_MB(param.high_gm_sz); + + vgpu = __intel_gvt_create_vgpu(gvt, ¶m); + if (IS_ERR(vgpu)) + return vgpu; + + /* calculate left instance change for types */ + intel_gvt_update_vgpu_types(gvt); + + return vgpu; +} + +/** + * intel_gvt_reset_vgpu - reset a virtual GPU + * @vgpu: virtual GPU + * + * This function is called when user wants to reset a virtual GPU. + * + */ +void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu) +{ +} diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 7166a1a6265d..1cc971cb6cb1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -547,11 +547,11 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) pipe, plane); } if (work->flip_queued_req) { - struct intel_engine_cs *engine = i915_gem_request_get_engine(work->flip_queued_req); + struct intel_engine_cs *engine = work->flip_queued_req->engine; seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", engine->name, - i915_gem_request_get_seqno(work->flip_queued_req), + work->flip_queued_req->global_seqno, atomic_read(&dev_priv->gt.global_timeline.next_seqno), intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); @@ -631,8 +631,9 @@ static void print_request(struct seq_file *m, struct drm_i915_gem_request *rq, const char *prefix) { - seq_printf(m, "%s%x [%x:%x] @ %d: %s\n", prefix, + seq_printf(m, "%s%x [%x:%x] prio=%d @ %dms: %s\n", prefix, rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, + rq->priotree.priority, jiffies_to_msecs(jiffies - rq->emitted_jiffies), rq->timeline->common->name); } @@ -3216,6 +3217,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) if (i915.enable_execlists) { u32 ptr, read, write; + struct rb_node *rb; seq_printf(m, "\tExeclist status: 0x%08x %08x\n", I915_READ(RING_EXECLIST_STATUS_LO(engine)), @@ -3254,11 +3256,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "\t\tELSP[1] idle\n"); rcu_read_unlock(); - spin_lock_irq(&engine->execlist_lock); - list_for_each_entry(rq, &engine->execlist_queue, execlist_link) { + spin_lock_irq(&engine->timeline->lock); + for (rb = engine->execlist_first; rb; rb = rb_next(rb)) { + rq = rb_entry(rb, typeof(*rq), priotree.node); print_request(m, rq, "\t\tQ "); } - spin_unlock_irq(&engine->execlist_lock); + spin_unlock_irq(&engine->timeline->lock); } else if (INTEL_GEN(dev_priv) > 6) { seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", I915_READ(RING_PP_DIR_BASE(engine))); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0213a3090ab3..4f0e56d3b441 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -150,7 +150,7 @@ static void intel_detect_pch(struct drm_device *dev) /* In all current cases, num_pipes is equivalent to the PCH_NOP setting * (which really amounts to a PCH but no South Display). */ - if (INTEL_INFO(dev)->num_pipes == 0) { + if (INTEL_INFO(dev_priv)->num_pipes == 0) { dev_priv->pch_type = PCH_NOP; return; } @@ -323,6 +323,10 @@ static int i915_getparam(struct drm_device *dev, void *data, */ value = i915_gem_mmap_gtt_version(); break; + case I915_PARAM_HAS_SCHEDULER: + value = dev_priv->engine[RCS] && + dev_priv->engine[RCS]->schedule; + break; case I915_PARAM_MMAP_VERSION: /* Remember to bump this if the version changes! */ case I915_PARAM_HAS_GEM: @@ -374,12 +378,12 @@ static int intel_alloc_mchbar_resource(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915; + int reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915; u32 temp_lo, temp_hi = 0; u64 mchbar_addr; int ret; - if (INTEL_INFO(dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) pci_read_config_dword(dev_priv->bridge_dev, reg + 4, &temp_hi); pci_read_config_dword(dev_priv->bridge_dev, reg, &temp_lo); mchbar_addr = ((u64)temp_hi << 32) | temp_lo; @@ -406,7 +410,7 @@ intel_alloc_mchbar_resource(struct drm_device *dev) return ret; } - if (INTEL_INFO(dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) pci_write_config_dword(dev_priv->bridge_dev, reg + 4, upper_32_bits(dev_priv->mch_res.start)); @@ -420,7 +424,7 @@ static void intel_setup_mchbar(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915; + int mchbar_reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915; u32 temp; bool enabled; @@ -460,7 +464,7 @@ static void intel_teardown_mchbar(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int mchbar_reg = INTEL_INFO(dev)->gen >= 4 ? MCHBAR_I965 : MCHBAR_I915; + int mchbar_reg = INTEL_GEN(dev_priv) >= 4 ? MCHBAR_I965 : MCHBAR_I915; if (dev_priv->mchbar_need_disable) { if (IS_I915G(dev_priv) || IS_I915GM(dev_priv)) { @@ -607,7 +611,7 @@ static int i915_load_modeset_init(struct drm_device *dev) intel_modeset_gem_init(dev); - if (INTEL_INFO(dev)->num_pipes == 0) + if (INTEL_INFO(dev_priv)->num_pipes == 0) return 0; ret = intel_fbdev_init(dev); @@ -879,7 +883,7 @@ static int i915_mmio_setup(struct drm_device *dev) * the register BAR remains the same size for all the earlier * generations up to Ironlake. */ - if (INTEL_INFO(dev)->gen < 5) + if (INTEL_GEN(dev_priv) < 5) mmio_size = 512 * 1024; else mmio_size = 2 * 1024 * 1024; @@ -1168,8 +1172,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) /** * i915_driver_load - setup chip and create an initial config - * @dev: DRM device - * @flags: startup flags + * @pdev: PCI device + * @ent: matching PCI ID entry * * The driver load routine has to do several things: * - drive output discovery via intel_modeset_init() @@ -1512,7 +1516,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) * Fujitsu FSC S7110 * Acer Aspire 1830T */ - if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6)) + if (!(hibernation && INTEL_GEN(dev_priv) < 6)) pci_set_power_state(pdev, PCI_D3hot); dev_priv->suspended_to_idle = suspend_to_idle(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6f4a6bcf6ed4..5192206c62e2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -60,11 +60,15 @@ #include "intel_ringbuffer.h" #include "i915_gem.h" +#include "i915_gem_fence_reg.h" +#include "i915_gem_object.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_vma.h" + #include "intel_gvt.h" /* General customization: @@ -459,23 +463,6 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -struct drm_i915_fence_reg { - struct list_head link; - struct drm_i915_private *i915; - struct i915_vma *vma; - int pin_count; - int id; - /** - * Whether the tiling parameters for the currently - * associated fence register have changed. Note that - * for the purposes of tracking tiling changes we also - * treat the unfenced register, the register slot that - * the object occupies whilst it executes a fenced - * command (such as BLT on gen2/3), as a "fence". - */ - bool dirty; -}; - struct sdvo_device_mapping { u8 initialized; u8 dvo_port; @@ -487,6 +474,7 @@ struct sdvo_device_mapping { struct intel_connector; struct intel_encoder; +struct intel_atomic_state; struct intel_crtc_state; struct intel_initial_plane_config; struct intel_crtc; @@ -500,8 +488,12 @@ struct drm_i915_display_funcs { int (*compute_intermediate_wm)(struct drm_device *dev, struct intel_crtc *intel_crtc, struct intel_crtc_state *newstate); - void (*initial_watermarks)(struct intel_crtc_state *cstate); - void (*optimize_watermarks)(struct intel_crtc_state *cstate); + void (*initial_watermarks)(struct intel_atomic_state *state, + struct intel_crtc_state *cstate); + void (*atomic_update_watermarks)(struct intel_atomic_state *state, + struct intel_crtc_state *cstate); + void (*optimize_watermarks)(struct intel_atomic_state *state, + struct intel_crtc_state *cstate); int (*compute_global_watermarks)(struct drm_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); @@ -562,6 +554,18 @@ enum forcewake_domains { #define FW_REG_READ (1) #define FW_REG_WRITE (2) +enum decoupled_power_domain { + GEN9_DECOUPLED_PD_BLITTER = 0, + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL +}; + +enum decoupled_ops { + GEN9_DECOUPLED_OP_WRITE = 0, + GEN9_DECOUPLED_OP_READ +}; + enum forcewake_domains intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, i915_reg_t reg, unsigned int op); @@ -668,7 +672,7 @@ struct intel_csr { func(is_skylake); \ func(is_broxton); \ func(is_kabylake); \ - func(is_preliminary); \ + func(is_alpha_support); \ /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ func(has_csr); \ @@ -696,7 +700,8 @@ struct intel_csr { func(cursor_needs_physical); \ func(hws_needs_physical); \ func(overlay_needs_physical); \ - func(supports_tv) + func(supports_tv); \ + func(has_decoupled_mmio) struct sseu_dev_info { u8 slice_mask; @@ -949,6 +954,7 @@ struct i915_gem_context { /* Unique identifier for this context, used by the hw for tracking */ unsigned int hw_id; u32 user_handle; + int priority; /* greater priorities are serviced first */ u32 ggtt_alignment; @@ -1791,6 +1797,7 @@ struct drm_i915_private { struct kmem_cache *objects; struct kmem_cache *vmas; struct kmem_cache *requests; + struct kmem_cache *dependencies; const struct intel_device_info info; @@ -2053,13 +2060,6 @@ struct drm_i915_private { */ uint16_t skl_latency[8]; - /* - * The skl_wm_values structure is a bit too big for stack - * allocation, so we keep the staging struct where we store - * intermediate results here instead. - */ - struct skl_wm_values skl_results; - /* current hardware state */ union { struct ilk_wm_values hw; @@ -2179,31 +2179,6 @@ enum hdmi_force_audio { #define I915_GTT_OFFSET_NONE ((u32)-1) -struct drm_i915_gem_object_ops { - unsigned int flags; -#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 -#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 - - /* Interface between the GEM object and its backing storage. - * get_pages() is called once prior to the use of the associated set - * of pages before to binding them into the GTT, and put_pages() is - * called after we no longer need them. As we expect there to be - * associated cost with migrating pages between the backing storage - * and making them available for the GPU (e.g. clflush), we may hold - * onto the pages after they are no longer referenced by the GPU - * in case they may be used again shortly (for example migrating the - * pages to a different memory domain within the GTT). put_pages() - * will therefore most likely be called when the object itself is - * being released or under memory pressure (where we attempt to - * reap pages for the shrinker). - */ - struct sg_table *(*get_pages)(struct drm_i915_gem_object *); - void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); - - int (*dmabuf_export)(struct drm_i915_gem_object *); - void (*release)(struct drm_i915_gem_object *); -}; - /* * Frontbuffer tracking bits. Set in obj->frontbuffer_bits while a gem bo is * considered to be the frontbuffer for the given plane interface-wise. This @@ -2225,292 +2200,6 @@ struct drm_i915_gem_object_ops { #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \ (0xff << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe))) -struct drm_i915_gem_object { - struct drm_gem_object base; - - const struct drm_i915_gem_object_ops *ops; - - /** List of VMAs backed by this object */ - struct list_head vma_list; - struct rb_root vma_tree; - - /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; - struct list_head global_link; - union { - struct rcu_head rcu; - struct llist_node freed; - }; - - /** - * Whether the object is currently in the GGTT mmap. - */ - struct list_head userfault_link; - - /** Used in execbuf to temporarily hold a ref */ - struct list_head obj_exec_link; - - struct list_head batch_pool_link; - - unsigned long flags; - - /** - * Have we taken a reference for the object for incomplete GPU - * activity? - */ -#define I915_BO_ACTIVE_REF 0 - - /* - * Is the object to be mapped as read-only to the GPU - * Only honoured if hardware has relevant pte bit - */ - unsigned long gt_ro:1; - unsigned int cache_level:3; - unsigned int cache_dirty:1; - - atomic_t frontbuffer_bits; - unsigned int frontbuffer_ggtt_origin; /* write once */ - - /** Current tiling stride for the object, if it's tiled. */ - unsigned int tiling_and_stride; -#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ -#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) -#define STRIDE_MASK (~TILING_MASK) - - /** Count of VMA actually bound by this object */ - unsigned int bind_count; - unsigned int active_count; - unsigned int pin_display; - - struct { - struct mutex lock; /* protects the pages and their use */ - atomic_t pages_pin_count; - - struct sg_table *pages; - void *mapping; - - struct i915_gem_object_page_iter { - struct scatterlist *sg_pos; - unsigned int sg_idx; /* in pages, but 32bit eek! */ - - struct radix_tree_root radix; - struct mutex lock; /* protects this cache */ - } get_page; - - /** - * Advice: are the backing pages purgeable? - */ - unsigned int madv:2; - - /** - * This is set if the object has been written to since the - * pages were last acquired. - */ - bool dirty:1; - - /** - * This is set if the object has been pinned due to unknown - * swizzling. - */ - bool quirked:1; - } mm; - - /** Breadcrumb of last rendering to the buffer. - * There can only be one writer, but we allow for multiple readers. - * If there is a writer that necessarily implies that all other - * read requests are complete - but we may only be lazily clearing - * the read requests. A read request is naturally the most recent - * request on a ring, so we may have two different write and read - * requests on one ring where the write request is older than the - * read request. This allows for the CPU to read from an active - * buffer by only waiting for the write to complete. - */ - struct reservation_object *resv; - - /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; - - /** Record of address bit 17 of each page at last unbind. */ - unsigned long *bit_17; - - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; - - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; - - /** for phys allocated objects */ - struct drm_dma_handle *phys_handle; - - struct reservation_object __builtin_resv; -}; - -static inline struct drm_i915_gem_object * -to_intel_bo(struct drm_gem_object *gem) -{ - /* Assert that to_intel_bo(NULL) == NULL */ - BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); - - return container_of(gem, struct drm_i915_gem_object, base); -} - -/** - * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle - * @filp: DRM file private date - * @handle: userspace handle - * - * Returns: - * - * A pointer to the object named by the handle if such exists on @filp, NULL - * otherwise. This object is only valid whilst under the RCU read lock, and - * note carefully the object may be in the process of being destroyed. - */ -static inline struct drm_i915_gem_object * -i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) -{ -#ifdef CONFIG_LOCKDEP - WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); -#endif - return idr_find(&file->object_idr, handle); -} - -static inline struct drm_i915_gem_object * -i915_gem_object_lookup(struct drm_file *file, u32 handle) -{ - struct drm_i915_gem_object *obj; - - rcu_read_lock(); - obj = i915_gem_object_lookup_rcu(file, handle); - if (obj && !kref_get_unless_zero(&obj->base.refcount)) - obj = NULL; - rcu_read_unlock(); - - return obj; -} - -__deprecated -extern struct drm_gem_object * -drm_gem_object_lookup(struct drm_file *file, u32 handle); - -__attribute__((nonnull)) -static inline struct drm_i915_gem_object * -i915_gem_object_get(struct drm_i915_gem_object *obj) -{ - drm_gem_object_reference(&obj->base); - return obj; -} - -__deprecated -extern void drm_gem_object_reference(struct drm_gem_object *); - -__attribute__((nonnull)) -static inline void -i915_gem_object_put(struct drm_i915_gem_object *obj) -{ - __drm_gem_object_unreference(&obj->base); -} - -__deprecated -extern void drm_gem_object_unreference(struct drm_gem_object *); - -__deprecated -extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); - -static inline bool -i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) -{ - return atomic_read(&obj->base.refcount.refcount) == 0; -} - -static inline bool -i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) -{ - return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; -} - -static inline bool -i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) -{ - return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; -} - -static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) -{ - return obj->active_count; -} - -static inline bool -i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) -{ - return test_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -static inline void -i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - __set_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -static inline void -i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); -} - -void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); - -static inline unsigned int -i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) -{ - return obj->tiling_and_stride & TILING_MASK; -} - -static inline bool -i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) -{ - return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; -} - -static inline unsigned int -i915_gem_object_get_stride(struct drm_i915_gem_object *obj) -{ - return obj->tiling_and_stride & STRIDE_MASK; -} - -static inline struct intel_engine_cs * -i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) -{ - struct intel_engine_cs *engine = NULL; - struct dma_fence *fence; - - rcu_read_lock(); - fence = reservation_object_get_excl_rcu(obj->resv); - rcu_read_unlock(); - - if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) - engine = to_request(fence)->engine; - dma_fence_put(fence); - - return engine; -} - -static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) -{ - i915_gem_object_get(vma->obj); - return vma; -} - -static inline void i915_vma_put(struct i915_vma *vma) -{ - i915_gem_object_put(vma->obj); -} - /* * Optimised SGL iterator for GEM objects */ @@ -2700,7 +2389,7 @@ struct drm_i915_cmd_table { #define INTEL_DEVID(dev_priv) ((dev_priv)->info.device_id) #define REVID_FOREVER 0xff -#define INTEL_REVID(p) (__I915__(p)->drm.pdev->revision) +#define INTEL_REVID(dev_priv) ((dev_priv)->drm.pdev->revision) #define GEN_FOREVER (0) /* @@ -2797,7 +2486,7 @@ struct drm_i915_cmd_table { #define IS_SKL_GT4(dev_priv) (IS_SKYLAKE(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0x00F0) == 0x0030) -#define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary) +#define IS_ALPHA_SUPPORT(intel_info) ((intel_info)->is_alpha_support) #define SKL_REVID_A0 0x0 #define SKL_REVID_B0 0x1 @@ -2851,28 +2540,31 @@ struct drm_i915_cmd_table { #define ALL_ENGINES (~0) #define HAS_ENGINE(dev_priv, id) \ - (!!(INTEL_INFO(dev_priv)->ring_mask & ENGINE_MASK(id))) + (!!((dev_priv)->info.ring_mask & ENGINE_MASK(id))) #define HAS_BSD(dev_priv) HAS_ENGINE(dev_priv, VCS) #define HAS_BSD2(dev_priv) HAS_ENGINE(dev_priv, VCS2) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) -#define HAS_LLC(dev) (INTEL_INFO(dev)->has_llc) -#define HAS_SNOOP(dev) (INTEL_INFO(dev)->has_snoop) -#define HAS_EDRAM(dev) (!!(__I915__(dev)->edram_cap & EDRAM_ENABLED)) +#define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) +#define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) +#define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) -#define HWS_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->hws_needs_physical) -#define HAS_HW_CONTEXTS(dev) (INTEL_INFO(dev)->has_hw_contexts) -#define HAS_LOGICAL_RING_CONTEXTS(dev) (INTEL_INFO(dev)->has_logical_ring_contexts) -#define USES_PPGTT(dev) (i915.enable_ppgtt) -#define USES_FULL_PPGTT(dev) (i915.enable_ppgtt >= 2) -#define USES_FULL_48BIT_PPGTT(dev) (i915.enable_ppgtt == 3) +#define HWS_NEEDS_PHYSICAL(dev_priv) ((dev_priv)->info.hws_needs_physical) + +#define HAS_HW_CONTEXTS(dev_priv) ((dev_priv)->info.has_hw_contexts) +#define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \ + ((dev_priv)->info.has_logical_ring_contexts) +#define USES_PPGTT(dev_priv) (i915.enable_ppgtt) +#define USES_FULL_PPGTT(dev_priv) (i915.enable_ppgtt >= 2) +#define USES_FULL_48BIT_PPGTT(dev_priv) (i915.enable_ppgtt == 3) -#define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) -#define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) +#define HAS_OVERLAY(dev_priv) ((dev_priv)->info.has_overlay) +#define OVERLAY_NEEDS_PHYSICAL(dev_priv) \ + ((dev_priv)->info.overlay_needs_physical) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_845G(dev_priv)) @@ -2889,8 +2581,8 @@ struct drm_i915_cmd_table { * legacy irq no. is shared with another device. The kernel then disables that * interrupt source and so prevents the other device from working properly. */ -#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5) -#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->has_gmbus_irq) +#define HAS_AUX_IRQ(dev_priv) ((dev_priv)->info.gen >= 5) +#define HAS_GMBUS_IRQ(dev_priv) ((dev_priv)->info.has_gmbus_irq) /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. @@ -2898,24 +2590,24 @@ struct drm_i915_cmd_table { #define HAS_128_BYTE_Y_TILING(dev_priv) (!IS_GEN2(dev_priv) && \ !(IS_I915G(dev_priv) || \ IS_I915GM(dev_priv))) -#define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) -#define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) +#define SUPPORTS_TV(dev_priv) ((dev_priv)->info.supports_tv) +#define I915_HAS_HOTPLUG(dev_priv) ((dev_priv)->info.has_hotplug) -#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) -#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) -#define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) +#define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr) +#define HAS_FBC(dev_priv) ((dev_priv)->info.has_fbc) #define HAS_IPS(dev_priv) (IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv)) -#define HAS_DP_MST(dev) (INTEL_INFO(dev)->has_dp_mst) +#define HAS_DP_MST(dev_priv) ((dev_priv)->info.has_dp_mst) -#define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) -#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) -#define HAS_PSR(dev) (INTEL_INFO(dev)->has_psr) -#define HAS_RC6(dev) (INTEL_INFO(dev)->has_rc6) -#define HAS_RC6p(dev) (INTEL_INFO(dev)->has_rc6p) +#define HAS_DDI(dev_priv) ((dev_priv)->info.has_ddi) +#define HAS_FPGA_DBG_UNCLAIMED(dev_priv) ((dev_priv)->info.has_fpga_dbg) +#define HAS_PSR(dev_priv) ((dev_priv)->info.has_psr) +#define HAS_RC6(dev_priv) ((dev_priv)->info.has_rc6) +#define HAS_RC6p(dev_priv) ((dev_priv)->info.has_rc6p) -#define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) +#define HAS_CSR(dev_priv) ((dev_priv)->info.has_csr) #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) #define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) @@ -2925,13 +2617,13 @@ struct drm_i915_cmd_table { * command submission once loaded. But these are logically independent * properties, so we have separate macros to test them. */ -#define HAS_GUC(dev) (INTEL_INFO(dev)->has_guc) -#define HAS_GUC_UCODE(dev) (HAS_GUC(dev)) -#define HAS_GUC_SCHED(dev) (HAS_GUC(dev)) +#define HAS_GUC(dev_priv) ((dev_priv)->info.has_guc) +#define HAS_GUC_UCODE(dev_priv) (HAS_GUC(dev_priv)) +#define HAS_GUC_SCHED(dev_priv) (HAS_GUC(dev_priv)) -#define HAS_RESOURCE_STREAMER(dev) (INTEL_INFO(dev)->has_resource_streamer) +#define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer) -#define HAS_POOLED_EU(dev) (INTEL_INFO(dev)->has_pooled_eu) +#define HAS_POOLED_EU(dev_priv) ((dev_priv)->info.has_pooled_eu) #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 @@ -2971,6 +2663,8 @@ struct drm_i915_cmd_table { #define GT_FREQUENCY_MULTIPLIER 50 #define GEN9_FREQ_SCALER 3 +#define HAS_DECOUPLED_MMIO(dev_priv) (INTEL_INFO(dev_priv)->has_decoupled_mmio) + #include "i915_trace.h" static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv) @@ -3222,13 +2916,6 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags); -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags); -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); -int __must_check i915_vma_unbind(struct i915_vma *vma); -void i915_vma_close(struct i915_vma *vma); -void i915_vma_destroy(struct i915_vma *vma); - int i915_gem_object_unbind(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); @@ -3405,7 +3092,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); @@ -3419,6 +3106,11 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, unsigned int flags, long timeout, struct intel_rps_client *rps); +int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, + unsigned int flags, + int priority); +#define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX + int __must_check i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); @@ -3478,54 +3170,10 @@ i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); } -/* i915_gem_fence.c */ +/* i915_gem_fence_reg.c */ int __must_check i915_vma_get_fence(struct i915_vma *vma); int __must_check i915_vma_put_fence(struct i915_vma *vma); -/** - * i915_vma_pin_fence - pin fencing state - * @vma: vma to pin fencing for - * - * This pins the fencing state (whether tiled or untiled) to make sure the - * vma (and its object) is ready to be used as a scanout target. Fencing - * status must be synchronize first by calling i915_vma_get_fence(): - * - * The resulting fence pin reference must be released again with - * i915_vma_unpin_fence(). - * - * Returns: - * - * True if the vma has a fence, false otherwise. - */ -static inline bool -i915_vma_pin_fence(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (vma->fence) { - vma->fence->pin_count++; - return true; - } else - return false; -} - -/** - * i915_vma_unpin_fence - unpin fencing state - * @vma: vma to unpin fencing for - * - * This releases the fence pin reference acquired through - * i915_vma_pin_fence. It will handle both objects with and without an - * attached fence correctly, callers do not need to distinguish this. - */ -static inline void -i915_vma_unpin_fence(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (vma->fence) { - GEM_BUG_ON(vma->fence->pin_count <= 0); - vma->fence->pin_count--; - } -} - void i915_gem_restore_fences(struct drm_device *dev); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 41e697e5dbcd..3fb5e66e4d65 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -34,6 +34,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include <linux/dma-fence-array.h> #include <linux/reservation.h> #include <linux/shmem_fs.h> #include <linux/slab.h> @@ -48,7 +49,7 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) { - return HAS_LLC(dev) || level != I915_CACHE_NONE; + return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; } static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) @@ -220,7 +221,8 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) } static void -__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj) +__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, + struct sg_table *pages) { GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); @@ -228,7 +230,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj) obj->mm.dirty = false; if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) - i915_gem_clflush_object(obj, false); + drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; @@ -238,7 +240,7 @@ static void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, struct sg_table *pages) { - __i915_gem_object_release_shmem(obj); + __i915_gem_object_release_shmem(obj, pages); if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; @@ -433,6 +435,70 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, return timeout; } +static void __fence_set_priority(struct dma_fence *fence, int prio) +{ + struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine; + + if (!dma_fence_is_i915(fence)) + return; + + rq = to_request(fence); + engine = rq->engine; + if (!engine->schedule) + return; + + engine->schedule(rq, prio); +} + +static void fence_set_priority(struct dma_fence *fence, int prio) +{ + /* Recurse once into a fence-array */ + if (dma_fence_is_array(fence)) { + struct dma_fence_array *array = to_dma_fence_array(fence); + int i; + + for (i = 0; i < array->num_fences; i++) + __fence_set_priority(array->fences[i], prio); + } else { + __fence_set_priority(fence, prio); + } +} + +int +i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, + unsigned int flags, + int prio) +{ + struct dma_fence *excl; + + if (flags & I915_WAIT_ALL) { + struct dma_fence **shared; + unsigned int count, i; + int ret; + + ret = reservation_object_get_fences_rcu(obj->resv, + &excl, &count, &shared); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + fence_set_priority(shared[i], prio); + dma_fence_put(shared[i]); + } + + kfree(shared); + } else { + excl = reservation_object_get_excl_rcu(obj->resv); + } + + if (excl) { + fence_set_priority(excl, prio); + dma_fence_put(excl); + } + return 0; +} + /** * Waits for rendering to the object to be completed * @obj: i915 gem object @@ -1757,7 +1823,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) goto err_rpm; /* Access to snoopable pages through the GTT is incoherent. */ - if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) { + if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev_priv)) { ret = -EFAULT; goto err_unlock; } @@ -2150,7 +2216,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, struct sgt_iter sgt_iter; struct page *page; - __i915_gem_object_release_shmem(obj); + __i915_gem_object_release_shmem(obj, pages); i915_gem_gtt_finish_pages(obj, pages); @@ -2232,6 +2298,30 @@ static unsigned int swiotlb_max_size(void) #endif } +static void i915_sg_trim(struct sg_table *orig_st) +{ + struct sg_table new_st; + struct scatterlist *sg, *new_sg; + unsigned int i; + + if (orig_st->nents == orig_st->orig_nents) + return; + + if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL)) + return; + + new_sg = new_st.sgl; + for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { + sg_set_page(new_sg, sg_page(sg), sg->length, 0); + /* called before being DMA mapped, no need to copy sg->dma_* */ + new_sg = sg_next(new_sg); + } + + sg_free_table(orig_st); + + *orig_st = new_st; +} + static struct sg_table * i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { @@ -2317,6 +2407,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (sg) /* loop terminated early; short sg table */ sg_mark_end(sg); + /* Trim unused sg entries to avoid wasting memory. */ + i915_sg_trim(st); + ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) goto err_pages; @@ -2689,12 +2782,17 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) */ if (i915.enable_execlists) { - spin_lock(&engine->execlist_lock); - INIT_LIST_HEAD(&engine->execlist_queue); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline->lock, flags); + i915_gem_request_put(engine->execlist_port[0].request); i915_gem_request_put(engine->execlist_port[1].request); memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); - spin_unlock(&engine->execlist_lock); + engine->execlist_queue = RB_ROOT; + engine->execlist_first = NULL; + + spin_unlock_irqrestore(&engine->timeline->lock, flags); } } @@ -2892,117 +2990,6 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static void __i915_vma_iounmap(struct i915_vma *vma) -{ - GEM_BUG_ON(i915_vma_is_pinned(vma)); - - if (vma->iomap == NULL) - return; - - io_mapping_unmap(vma->iomap); - vma->iomap = NULL; -} - -int i915_vma_unbind(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - unsigned long active; - int ret; - - lockdep_assert_held(&obj->base.dev->struct_mutex); - - /* First wait upon any activity as retiring the request may - * have side-effects such as unpinning or even unbinding this vma. - */ - active = i915_vma_get_active(vma); - if (active) { - int idx; - - /* When a closed VMA is retired, it is unbound - eek. - * In order to prevent it from being recursively closed, - * take a pin on the vma so that the second unbind is - * aborted. - * - * Even more scary is that the retire callback may free - * the object (last active vma). To prevent the explosion - * we defer the actual object free to a worker that can - * only proceed once it acquires the struct_mutex (which - * we currently hold, therefore it cannot free this object - * before we are finished). - */ - __i915_vma_pin(vma); - - for_each_active(active, idx) { - ret = i915_gem_active_retire(&vma->last_read[idx], - &vma->vm->dev->struct_mutex); - if (ret) - break; - } - - __i915_vma_unpin(vma); - if (ret) - return ret; - - GEM_BUG_ON(i915_vma_is_active(vma)); - } - - if (i915_vma_is_pinned(vma)) - return -EBUSY; - - if (!drm_mm_node_allocated(&vma->node)) - goto destroy; - - GEM_BUG_ON(obj->bind_count == 0); - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - - if (i915_vma_is_map_and_fenceable(vma)) { - /* release the fence reg _after_ flushing */ - ret = i915_vma_put_fence(vma); - if (ret) - return ret; - - /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); - - __i915_vma_iounmap(vma); - vma->flags &= ~I915_VMA_CAN_FENCE; - } - - if (likely(!vma->vm->closed)) { - trace_i915_vma_unbind(vma); - vma->vm->unbind_vma(vma); - } - vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - - if (vma->pages != obj->mm.pages) { - GEM_BUG_ON(!vma->pages); - sg_free_table(vma->pages); - kfree(vma->pages); - } - vma->pages = NULL; - - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - - return 0; -} - static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) { int ret, i; @@ -3018,201 +3005,43 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { - struct i915_gem_timeline *tl; int ret; - list_for_each_entry(tl, &i915->gt.timelines, link) { - ret = wait_for_timeline(tl, flags); - if (ret) - return ret; - } - - return 0; -} + if (flags & I915_WAIT_LOCKED) { + struct i915_gem_timeline *tl; -static bool i915_gem_valid_gtt_space(struct i915_vma *vma, - unsigned long cache_level) -{ - struct drm_mm_node *gtt_space = &vma->node; - struct drm_mm_node *other; + lockdep_assert_held(&i915->drm.struct_mutex); - /* - * On some machines we have to be careful when putting differing types - * of snoopable memory together to avoid the prefetcher crossing memory - * domains and dying. During vm initialisation, we decide whether or not - * these constraints apply and set the drm_mm.color_adjust - * appropriately. - */ - if (vma->vm->mm.color_adjust == NULL) - return true; - - if (!drm_mm_node_allocated(gtt_space)) - return true; - - if (list_empty(>t_space->node_list)) - return true; - - other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); - if (other->allocated && !other->hole_follows && other->color != cache_level) - return false; - - other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); - if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) - return false; - - return true; -} - -/** - * i915_vma_insert - finds a slot for the vma in its address space - * @vma: the vma - * @size: requested size in bytes (can be larger than the VMA) - * @alignment: required alignment - * @flags: mask of PIN_* flags to use - * - * First we try to allocate some free space that meets the requirements for - * the VMA. Failiing that, if the flags permit, it will evict an old VMA, - * preferrably the oldest idle entry to make room for the new VMA. - * - * Returns: - * 0 on success, negative error code otherwise. - */ -static int -i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); - struct drm_i915_gem_object *obj = vma->obj; - u64 start, end; - int ret; - - GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); - GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); - - size = max(size, vma->size); - if (flags & PIN_MAPPABLE) - size = i915_gem_get_ggtt_size(dev_priv, size, - i915_gem_object_get_tiling(obj)); - - alignment = max(max(alignment, vma->display_alignment), - i915_gem_get_ggtt_alignment(dev_priv, size, - i915_gem_object_get_tiling(obj), - flags & PIN_MAPPABLE)); - - start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; - - end = vma->vm->total; - if (flags & PIN_MAPPABLE) - end = min_t(u64, end, dev_priv->ggtt.mappable_end); - if (flags & PIN_ZONE_4G) - end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); - - /* If binding the object/GGTT view requires more space than the entire - * aperture has, reject it early before evicting everything in a vain - * attempt to find space. - */ - if (size > end) { - DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", - size, obj->base.size, - flags & PIN_MAPPABLE ? "mappable" : "total", - end); - return -E2BIG; - } - - ret = i915_gem_object_pin_pages(obj); - if (ret) - return ret; - - if (flags & PIN_OFFSET_FIXED) { - u64 offset = flags & PIN_OFFSET_MASK; - if (offset & (alignment - 1) || offset > end - size) { - ret = -EINVAL; - goto err_unpin; - } - - vma->node.start = offset; - vma->node.size = size; - vma->node.color = obj->cache_level; - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); - if (ret) { - ret = i915_gem_evict_for_vma(vma); - if (ret == 0) - ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + list_for_each_entry(tl, &i915->gt.timelines, link) { + ret = wait_for_timeline(tl, flags); if (ret) - goto err_unpin; + return ret; } } else { - u32 search_flag, alloc_flag; - - if (flags & PIN_HIGH) { - search_flag = DRM_MM_SEARCH_BELOW; - alloc_flag = DRM_MM_CREATE_TOP; - } else { - search_flag = DRM_MM_SEARCH_DEFAULT; - alloc_flag = DRM_MM_CREATE_DEFAULT; - } - - /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, - * so we know that we always have a minimum alignment of 4096. - * The drm_mm range manager is optimised to return results - * with zero alignment, so where possible use the optimal - * path. - */ - if (alignment <= 4096) - alignment = 0; - -search_free: - ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, - &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(vma->vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; - - goto err_unpin; - } - - GEM_BUG_ON(vma->node.start < start); - GEM_BUG_ON(vma->node.start + vma->node.size > end); + ret = wait_for_timeline(&i915->gt.global_timeline, flags); + if (ret) + return ret; } - GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - obj->bind_count++; - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); return 0; - -err_unpin: - i915_gem_object_unpin_pages(obj); - return ret; } -bool -i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + bool force) { /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ if (!obj->mm.pages) - return false; + return; /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. */ if (obj->stolen || obj->phys_handle) - return false; + return; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, @@ -3224,14 +3053,12 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, */ if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { obj->cache_dirty = true; - return false; + return; } trace_i915_gem_object_clflush(obj); drm_clflush_sg(obj->mm.pages); obj->cache_dirty = false; - - return true; } /** Flushes the GTT write domain for the object if it's dirty. */ @@ -3277,9 +3104,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - if (i915_gem_clflush_object(obj, obj->pin_display)) - i915_gem_chipset_flush(to_i915(obj->base.dev)); - + i915_gem_clflush_object(obj, obj->pin_display); intel_fb_obj_flush(obj, false, ORIGIN_CPU); obj->base.write_domain = 0; @@ -3435,7 +3260,8 @@ restart: if (ret) return ret; - if (!HAS_LLC(obj->base.dev) && cache_level != I915_CACHE_NONE) { + if (!HAS_LLC(to_i915(obj->base.dev)) && + cache_level != I915_CACHE_NONE) { /* Access to snoopable pages through the GTT is * incoherent and on some machines causes a hard * lockup. Relinquish the CPU mmaping to force @@ -3486,10 +3312,8 @@ out: * object is now coherent at its new cache level (with respect * to the access domain). */ - if (obj->cache_dirty && cpu_write_needs_clflush(obj)) { - if (i915_gem_clflush_object(obj, true)) - i915_gem_chipset_flush(to_i915(obj->base.dev)); - } + if (obj->cache_dirty && cpu_write_needs_clflush(obj)) + i915_gem_clflush_object(obj, true); return 0; } @@ -3798,100 +3622,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return ret < 0 ? ret : 0; } -static bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - if (!drm_mm_node_allocated(&vma->node)) - return false; - - if (vma->node.size < size) - return true; - - if (alignment && vma->node.start & (alignment - 1)) - return true; - - if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) - return true; - - if (flags & PIN_OFFSET_BIAS && - vma->node.start < (flags & PIN_OFFSET_MASK)) - return true; - - if (flags & PIN_OFFSET_FIXED && - vma->node.start != (flags & PIN_OFFSET_MASK)) - return true; - - return false; -} - -void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) -{ - struct drm_i915_gem_object *obj = vma->obj; - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - bool mappable, fenceable; - u32 fence_size, fence_alignment; - - fence_size = i915_gem_get_ggtt_size(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj)); - fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - vma->size, - i915_gem_object_get_tiling(obj), - true); - - fenceable = (vma->node.size == fence_size && - (vma->node.start & (fence_alignment - 1)) == 0); - - mappable = (vma->node.start + fence_size <= - dev_priv->ggtt.mappable_end); - - /* - * Explicitly disable for rotated VMA since the display does not - * need the fence and the VMA is not accessible to other users. - */ - if (mappable && fenceable && - vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) - vma->flags |= I915_VMA_CAN_FENCE; - else - vma->flags &= ~I915_VMA_CAN_FENCE; -} - -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags) -{ - unsigned int bound = vma->flags; - int ret; - - lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); - GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); - - if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { - ret = -EBUSY; - goto err; - } - - if ((bound & I915_VMA_BIND_MASK) == 0) { - ret = i915_vma_insert(vma, size, alignment, flags); - if (ret) - goto err; - } - - ret = i915_vma_bind(vma, vma->obj->cache_level, flags); - if (ret) - goto err; - - if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) - __i915_vma_set_map_and_fenceable(vma); - - GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); - return 0; - -err: - __i915_vma_unpin(vma); - return ret; -} - struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, @@ -4235,7 +3965,7 @@ i915_gem_object_create(struct drm_device *dev, u64 size) obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - if (HAS_LLC(dev)) { + if (HAS_LLC(dev_priv)) { /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than @@ -4481,7 +4211,7 @@ int i915_gem_suspend(struct drm_device *dev) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev)) { + if (HAS_HW_CONTEXTS(dev_priv)) { int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); WARN_ON(reset && reset != -ENODEV); } @@ -4574,7 +4304,7 @@ i915_gem_init_hw(struct drm_device *dev) /* Double layer security blanket, see i915_gem_init() */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - if (HAS_EDRAM(dev) && INTEL_GEN(dev_priv) < 9) + if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9) I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf)); if (IS_HASWELL(dev_priv)) @@ -4770,14 +4500,18 @@ i915_gem_load_init(struct drm_device *dev) if (!dev_priv->requests) goto err_vmas; + dev_priv->dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!dev_priv->dependencies) + goto err_requests; + mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); - err = i915_gem_timeline_init(dev_priv, - &dev_priv->gt.global_timeline, - "[execution]"); + err = i915_gem_timeline_init__global(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); if (err) - goto err_requests; + goto err_dependencies; INIT_LIST_HEAD(&dev_priv->context_list); INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); @@ -4805,6 +4539,8 @@ i915_gem_load_init(struct drm_device *dev) return 0; +err_dependencies: + kmem_cache_destroy(dev_priv->dependencies); err_requests: kmem_cache_destroy(dev_priv->requests); err_vmas: @@ -4821,6 +4557,7 @@ void i915_gem_load_cleanup(struct drm_device *dev) WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + kmem_cache_destroy(dev_priv->dependencies); kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 735580d72eb1..51ec793f2e20 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,7 +28,7 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #else -#define GEM_BUG_ON(expr) +#define GEM_BUG_ON(expr) do { } while (0) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 6dd475735f0a..1f94b8d6d83d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -476,6 +476,7 @@ int i915_gem_context_init(struct drm_device *dev) return PTR_ERR(ctx); } + ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; DRM_DEBUG_DRIVER("%s context support initialized\n", diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index fb5b44339f71..e804cb2fa57e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -287,7 +287,7 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) if (DBG_USE_CPU_RELOC) return DBG_USE_CPU_RELOC > 0; - return (HAS_LLC(obj->base.dev) || + return (HAS_LLC(to_i915(obj->base.dev)) || obj->base.write_domain == I915_GEM_DOMAIN_CPU || obj->cache_level != I915_CACHE_NONE); } @@ -833,7 +833,7 @@ need_reloc_mappable(struct i915_vma *vma) return false; /* See also use_cpu_reloc() */ - if (HAS_LLC(vma->obj->base.dev)) + if (HAS_LLC(to_i915(vma->obj->base.dev))) return false; if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) @@ -1624,7 +1624,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } if (args->flags & I915_EXEC_RESOURCE_STREAMER) { - if (!HAS_RESOURCE_STREAMER(dev)) { + if (!HAS_RESOURCE_STREAMER(dev_priv)) { DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index cd59dbc6588c..cd59dbc6588c 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.h b/drivers/gpu/drm/i915/i915_gem_fence_reg.h new file mode 100644 index 000000000000..22c4a2d01adf --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_FENCE_REG_H__ +#define __I915_FENCE_REG_H__ + +#include <linux/list.h> + +struct drm_i915_private; +struct i915_vma; + +struct drm_i915_fence_reg { + struct list_head link; + struct drm_i915_private *i915; + struct i915_vma *vma; + int pin_count; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; +}; + +#endif + diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5fafa3d4fc8..01f238adfb67 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -96,13 +96,6 @@ * */ -static inline struct i915_ggtt * -i915_vm_to_ggtt(struct i915_address_space *vm) -{ - GEM_BUG_ON(!i915_is_ggtt(vm)); - return container_of(vm, struct i915_ggtt, base); -} - static int i915_get_ggtt_vma_pages(struct i915_vma *vma); @@ -714,7 +707,7 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, */ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) { - ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; + ppgtt->pd_dirty_rings = INTEL_INFO(to_i915(ppgtt->base.dev))->ring_mask; } /* Removes entries from a single page table, releasing it if it's empty. @@ -3348,176 +3341,6 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } -static void -i915_vma_retire(struct i915_gem_active *active, - struct drm_i915_gem_request *rq) -{ - const unsigned int idx = rq->engine->id; - struct i915_vma *vma = - container_of(active, struct i915_vma, last_read[idx]); - struct drm_i915_gem_object *obj = vma->obj; - - GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); - - i915_vma_clear_active(vma, idx); - if (i915_vma_is_active(vma)) - return; - - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); - - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - if (--obj->active_count) - return; - - /* Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); - - obj->mm.dirty = true; /* be paranoid */ - - if (i915_gem_object_has_active_reference(obj)) { - i915_gem_object_clear_active_reference(obj); - i915_gem_object_put(obj); - } -} - -static void -i915_ggtt_retire__write(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - struct i915_vma *vma = - container_of(active, struct i915_vma, last_write); - - intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); -} - -void i915_vma_destroy(struct i915_vma *vma) -{ - GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); - GEM_BUG_ON(vma->fence); - - list_del(&vma->vm_link); - if (!i915_vma_is_ggtt(vma)) - i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); - - kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); -} - -void i915_vma_close(struct i915_vma *vma) -{ - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; - - list_del(&vma->obj_link); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); - - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); -} - -static inline long vma_compare(struct i915_vma *vma, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - - if (vma->vm != vm) - return vma->vm - vm; - - if (!view) - return vma->ggtt_view.type; - - if (vma->ggtt_view.type != view->type) - return vma->ggtt_view.type - view->type; - - return memcmp(&vma->ggtt_view.params, - &view->params, - sizeof(view->params)); -} - -static struct i915_vma * -__i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - struct rb_node *rb, **p; - int i; - - GEM_BUG_ON(vm->closed); - - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); - if (vma == NULL) - return ERR_PTR(-ENOMEM); - - INIT_LIST_HEAD(&vma->exec_list); - for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) - init_request_active(&vma->last_read[i], i915_vma_retire); - init_request_active(&vma->last_write, - i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); - init_request_active(&vma->last_fence, NULL); - list_add(&vma->vm_link, &vm->unbound_list); - vma->vm = vm; - vma->obj = obj; - vma->size = obj->base.size; - - if (view) { - vma->ggtt_view = *view; - if (view->type == I915_GGTT_VIEW_PARTIAL) { - vma->size = view->params.partial.size; - vma->size <<= PAGE_SHIFT; - } else if (view->type == I915_GGTT_VIEW_ROTATED) { - vma->size = - intel_rotation_info_size(&view->params.rotated); - vma->size <<= PAGE_SHIFT; - } - } - - if (i915_is_ggtt(vm)) { - vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - i915_ppgtt_get(i915_vm_to_ppgtt(vm)); - list_add_tail(&vma->obj_link, &obj->vma_list); - } - - rb = NULL; - p = &obj->vma_tree.rb_node; - while (*p) { - struct i915_vma *pos; - - rb = *p; - pos = rb_entry(rb, struct i915_vma, obj_node); - if (vma_compare(pos, vm, view) < 0) - p = &rb->rb_right; - else - p = &rb->rb_left; - } - rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); - - return vma; -} - -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - lockdep_assert_held(&obj->base.dev->struct_mutex); - GEM_BUG_ON(view && !i915_is_ggtt(vm)); - GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); - - return __i915_vma_create(obj, vm, view); -} - struct i915_vma * i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -3530,7 +3353,7 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; - cmp = vma_compare(vma, vm, view); + cmp = i915_vma_compare(vma, vm, view); if (cmp == 0) return vma; @@ -3555,7 +3378,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, vma = i915_gem_obj_to_vma(obj, vm, view); if (!vma) { - vma = __i915_vma_create(obj, vm, view); + vma = i915_vma_create(obj, vm, view); GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); } @@ -3747,99 +3570,3 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return ret; } -/** - * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. - * @vma: VMA to map - * @cache_level: mapping cache level - * @flags: flags like global or local mapping - * - * DMA addresses are taken from the scatter-gather table of this object (or of - * this VMA in case of non-default GGTT views) and PTE entries set up. - * Note that DMA addresses are also the only part of the SG table we care about. - */ -int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, - u32 flags) -{ - u32 bind_flags; - u32 vma_flags; - int ret; - - if (WARN_ON(flags == 0)) - return -EINVAL; - - bind_flags = 0; - if (flags & PIN_GLOBAL) - bind_flags |= I915_VMA_GLOBAL_BIND; - if (flags & PIN_USER) - bind_flags |= I915_VMA_LOCAL_BIND; - - vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - if (flags & PIN_UPDATE) - bind_flags |= vma_flags; - else - bind_flags &= ~vma_flags; - if (bind_flags == 0) - return 0; - - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - - ret = vma->vm->bind_vma(vma, cache_level, bind_flags); - if (ret) - return ret; - - vma->flags |= bind_flags; - return 0; -} - -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) -{ - void __iomem *ptr; - - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(to_i915(vma->vm->dev)); - - lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) - return IO_ERR_PTR(-ENODEV); - - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); - - ptr = vma->iomap; - if (ptr == NULL) { - ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, - vma->node.start, - vma->node.size); - if (ptr == NULL) - return IO_ERR_PTR(-ENOMEM); - - vma->iomap = ptr; - } - - __i915_vma_pin(vma); - return ptr; -} - -void i915_vma_unpin_and_release(struct i915_vma **p_vma) -{ - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - - vma = fetch_and_zero(p_vma); - if (!vma) - return; - - obj = vma->obj; - - i915_vma_unpin(vma); - i915_vma_close(vma); - - __i915_gem_object_release_unless_active(obj); -} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c23ef9db1f53..57b5849c659e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -35,7 +35,9 @@ #define __I915_GEM_GTT_H__ #include <linux/io-mapping.h> +#include <linux/mm.h> +#include "i915_gem_timeline.h" #include "i915_gem_request.h" #define I915_FENCE_REG_NONE -1 @@ -138,6 +140,8 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +struct sg_table; + enum i915_ggtt_view_type { I915_GGTT_VIEW_NORMAL = 0, I915_GGTT_VIEW_ROTATED, @@ -168,135 +172,7 @@ extern const struct i915_ggtt_view i915_ggtt_view_rotated; enum i915_cache_level; -/** - * A VMA represents a GEM BO that is bound into an address space. Therefore, a - * VMA's presence cannot be guaranteed before binding, or after unbinding the - * object into/from the address space. - * - * To make things as simple as possible (ie. no refcounting), a VMA's lifetime - * will always be <= an objects lifetime. So object refcounting should cover us. - */ -struct i915_vma { - struct drm_mm_node node; - struct drm_i915_gem_object *obj; - struct i915_address_space *vm; - struct drm_i915_fence_reg *fence; - struct sg_table *pages; - void __iomem *iomap; - u64 size; - u64 display_alignment; - - unsigned int flags; - /** - * How many users have pinned this object in GTT space. The following - * users can each hold at most one reference: pwrite/pread, execbuffer - * (objects are not allowed multiple times for the same batchbuffer), - * and the framebuffer code. When switching/pageflipping, the - * framebuffer code has at most two buffers pinned per crtc. - * - * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 - * bits with absolutely no headroom. So use 4 bits. - */ -#define I915_VMA_PIN_MASK 0xf -#define I915_VMA_PIN_OVERFLOW BIT(5) - - /** Flags and address space this VMA is bound to */ -#define I915_VMA_GLOBAL_BIND BIT(6) -#define I915_VMA_LOCAL_BIND BIT(7) -#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) - -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CAN_FENCE BIT(9) -#define I915_VMA_CLOSED BIT(10) - - unsigned int active; - struct i915_gem_active last_read[I915_NUM_ENGINES]; - struct i915_gem_active last_write; - struct i915_gem_active last_fence; - - /** - * Support different GGTT views into the same object. - * This means there can be multiple VMA mappings per object and per VM. - * i915_ggtt_view_type is used to distinguish between those entries. - * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also - * assumed in GEM functions which take no ggtt view parameter. - */ - struct i915_ggtt_view ggtt_view; - - /** This object's place on the active/inactive lists */ - struct list_head vm_link; - - struct list_head obj_link; /* Link in the object's VMA list */ - struct rb_node obj_node; - - /** This vma's place in the batchbuffer or on the eviction list */ - struct list_head exec_list; - - /** - * Used for performing relocations during execbuffer insertion. - */ - struct hlist_node exec_node; - unsigned long exec_handle; - struct drm_i915_gem_exec_object2 *exec_entry; -}; - -struct i915_vma * -i915_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view); -void i915_vma_unpin_and_release(struct i915_vma **p_vma); - -static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_GGTT; -} - -static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_CAN_FENCE; -} - -static inline bool i915_vma_is_closed(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_CLOSED; -} - -static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) -{ - return vma->active; -} - -static inline bool i915_vma_is_active(const struct i915_vma *vma) -{ - return i915_vma_get_active(vma); -} - -static inline void i915_vma_set_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active |= BIT(engine); -} - -static inline void i915_vma_clear_active(struct i915_vma *vma, - unsigned int engine) -{ - vma->active &= ~BIT(engine); -} - -static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, - unsigned int engine) -{ - return vma->active & BIT(engine); -} - -static inline u32 i915_ggtt_offset(const struct i915_vma *vma) -{ - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON(!vma->node.allocated); - GEM_BUG_ON(upper_32_bits(vma->node.start)); - GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); - return lower_32_bits(vma->node.start); -} +struct i915_vma; struct i915_page_dma { struct page *page; @@ -606,6 +482,13 @@ i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) px_dma(ppgtt->base.scratch_pd); } +static inline struct i915_ggtt * +i915_vm_to_ggtt(struct i915_address_space *vm) +{ + GEM_BUG_ON(!i915_is_ggtt(vm)); + return container_of(vm, struct i915_ggtt, base); +} + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); @@ -653,88 +536,4 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, #define PIN_OFFSET_FIXED BIT(11) #define PIN_OFFSET_MASK (~4095) -int __i915_vma_do_pin(struct i915_vma *vma, - u64 size, u64 alignment, u64 flags); -static inline int __must_check -i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) -{ - BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); - BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); - BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); - - /* Pin early to prevent the shrinker/eviction logic from destroying - * our vma as we insert and bind. - */ - if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) - return 0; - - return __i915_vma_do_pin(vma, size, alignment, flags); -} - -static inline int i915_vma_pin_count(const struct i915_vma *vma) -{ - return vma->flags & I915_VMA_PIN_MASK; -} - -static inline bool i915_vma_is_pinned(const struct i915_vma *vma) -{ - return i915_vma_pin_count(vma); -} - -static inline void __i915_vma_pin(struct i915_vma *vma) -{ - vma->flags++; - GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); -} - -static inline void __i915_vma_unpin(struct i915_vma *vma) -{ - GEM_BUG_ON(!i915_vma_is_pinned(vma)); - vma->flags--; -} - -static inline void i915_vma_unpin(struct i915_vma *vma) -{ - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - __i915_vma_unpin(vma); -} - -/** - * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture - * @vma: VMA to iomap - * - * The passed in VMA has to be pinned in the global GTT mappable region. - * An extra pinning of the VMA is acquired for the return iomapping, - * the caller must call i915_vma_unpin_iomap to relinquish the pinning - * after the iomapping is no longer required. - * - * Callers must hold the struct_mutex. - * - * Returns a valid iomapped pointer or ERR_PTR. - */ -void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); -#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) - -/** - * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap - * @vma: VMA to unpin - * - * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). - * - * Callers must hold the struct_mutex. This function is only valid to be - * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). - */ -static inline void i915_vma_unpin_iomap(struct i915_vma *vma) -{ - lockdep_assert_held(&vma->vm->dev->struct_mutex); - GEM_BUG_ON(vma->iomap == NULL); - i915_vma_unpin(vma); -} - -static inline struct page *i915_vma_first_page(struct i915_vma *vma) -{ - GEM_BUG_ON(!vma->pages); - return sg_page(vma->pages->sgl); -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h new file mode 100644 index 000000000000..014f80392f18 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -0,0 +1,337 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_OBJECT_H__ +#define __I915_GEM_OBJECT_H__ + +#include <linux/reservation.h> + +#include <drm/drm_vma_manager.h> +#include <drm/drm_gem.h> +#include <drm/drmP.h> + +#include <drm/i915_drm.h> + +struct drm_i915_gem_object_ops { + unsigned int flags; +#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 +#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 + + /* Interface between the GEM object and its backing storage. + * get_pages() is called once prior to the use of the associated set + * of pages before to binding them into the GTT, and put_pages() is + * called after we no longer need them. As we expect there to be + * associated cost with migrating pages between the backing storage + * and making them available for the GPU (e.g. clflush), we may hold + * onto the pages after they are no longer referenced by the GPU + * in case they may be used again shortly (for example migrating the + * pages to a different memory domain within the GTT). put_pages() + * will therefore most likely be called when the object itself is + * being released or under memory pressure (where we attempt to + * reap pages for the shrinker). + */ + struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); + + int (*dmabuf_export)(struct drm_i915_gem_object *); + void (*release)(struct drm_i915_gem_object *); +}; + +struct drm_i915_gem_object { + struct drm_gem_object base; + + const struct drm_i915_gem_object_ops *ops; + + /** List of VMAs backed by this object */ + struct list_head vma_list; + struct rb_root vma_tree; + + /** Stolen memory for this object, instead of being backed by shmem. */ + struct drm_mm_node *stolen; + struct list_head global_link; + union { + struct rcu_head rcu; + struct llist_node freed; + }; + + /** + * Whether the object is currently in the GGTT mmap. + */ + struct list_head userfault_link; + + /** Used in execbuf to temporarily hold a ref */ + struct list_head obj_exec_link; + + struct list_head batch_pool_link; + + unsigned long flags; + + /** + * Have we taken a reference for the object for incomplete GPU + * activity? + */ +#define I915_BO_ACTIVE_REF 0 + + /* + * Is the object to be mapped as read-only to the GPU + * Only honoured if hardware has relevant pte bit + */ + unsigned long gt_ro:1; + unsigned int cache_level:3; + unsigned int cache_dirty:1; + + atomic_t frontbuffer_bits; + unsigned int frontbuffer_ggtt_origin; /* write once */ + + /** Current tiling stride for the object, if it's tiled. */ + unsigned int tiling_and_stride; +#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ +#define TILING_MASK (FENCE_MINIMUM_STRIDE-1) +#define STRIDE_MASK (~TILING_MASK) + + /** Count of VMA actually bound by this object */ + unsigned int bind_count; + unsigned int active_count; + unsigned int pin_display; + + struct { + struct mutex lock; /* protects the pages and their use */ + atomic_t pages_pin_count; + + struct sg_table *pages; + void *mapping; + + struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ + } get_page; + + /** + * Advice: are the backing pages purgeable? + */ + unsigned int madv:2; + + /** + * This is set if the object has been written to since the + * pages were last acquired. + */ + bool dirty:1; + + /** + * This is set if the object has been pinned due to unknown + * swizzling. + */ + bool quirked:1; + } mm; + + /** Breadcrumb of last rendering to the buffer. + * There can only be one writer, but we allow for multiple readers. + * If there is a writer that necessarily implies that all other + * read requests are complete - but we may only be lazily clearing + * the read requests. A read request is naturally the most recent + * request on a ring, so we may have two different write and read + * requests on one ring where the write request is older than the + * read request. This allows for the CPU to read from an active + * buffer by only waiting for the write to complete. + */ + struct reservation_object *resv; + + /** References from framebuffers, locks out tiling changes. */ + unsigned long framebuffer_references; + + /** Record of address bit 17 of each page at last unbind. */ + unsigned long *bit_17; + + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + /** for phys allocated objects */ + struct drm_dma_handle *phys_handle; + + struct reservation_object __builtin_resv; +}; + +static inline struct drm_i915_gem_object * +to_intel_bo(struct drm_gem_object *gem) +{ + /* Assert that to_intel_bo(NULL) == NULL */ + BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); + + return container_of(gem, struct drm_i915_gem_object, base); +} + +/** + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle + * @filp: DRM file private date + * @handle: userspace handle + * + * Returns: + * + * A pointer to the object named by the handle if such exists on @filp, NULL + * otherwise. This object is only valid whilst under the RCU read lock, and + * note carefully the object may be in the process of being destroyed. + */ +static inline struct drm_i915_gem_object * +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); +#endif + return idr_find(&file->object_idr, handle); +} + +static inline struct drm_i915_gem_object * +i915_gem_object_lookup(struct drm_file *file, u32 handle) +{ + struct drm_i915_gem_object *obj; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, handle); + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + rcu_read_unlock(); + + return obj; +} + +__deprecated +extern struct drm_gem_object * +drm_gem_object_lookup(struct drm_file *file, u32 handle); + +__attribute__((nonnull)) +static inline struct drm_i915_gem_object * +i915_gem_object_get(struct drm_i915_gem_object *obj) +{ + drm_gem_object_reference(&obj->base); + return obj; +} + +__deprecated +extern void drm_gem_object_reference(struct drm_gem_object *); + +__attribute__((nonnull)) +static inline void +i915_gem_object_put(struct drm_i915_gem_object *obj) +{ + __drm_gem_object_unreference(&obj->base); +} + +__deprecated +extern void drm_gem_object_unreference(struct drm_gem_object *); + +__deprecated +extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); + +static inline bool +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) +{ + return atomic_read(&obj->base.refcount.refcount) == 0; +} + +static inline bool +i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; +} + +static inline bool +i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; +} + +static inline bool +i915_gem_object_is_active(const struct drm_i915_gem_object *obj) +{ + return obj->active_count; +} + +static inline bool +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) +{ + return test_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __set_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); + +static inline unsigned int +i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & TILING_MASK; +} + +static inline bool +i915_gem_object_is_tiled(struct drm_i915_gem_object *obj) +{ + return i915_gem_object_get_tiling(obj) != I915_TILING_NONE; +} + +static inline unsigned int +i915_gem_object_get_stride(struct drm_i915_gem_object *obj) +{ + return obj->tiling_and_stride & STRIDE_MASK; +} + +static inline struct intel_engine_cs * +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) +{ + struct intel_engine_cs *engine = NULL; + struct dma_fence *fence; + + rcu_read_lock(); + fence = reservation_object_get_excl_rcu(obj->resv); + rcu_read_unlock(); + + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + dma_fence_put(fence); + + return engine; +} + +#endif + diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 0b3b051a5683..b9b5253cf3cd 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -113,6 +113,82 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) spin_unlock(&file_priv->mm.lock); } +static struct i915_dependency * +i915_dependency_alloc(struct drm_i915_private *i915) +{ + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); +} + +static void +i915_dependency_free(struct drm_i915_private *i915, + struct i915_dependency *dep) +{ + kmem_cache_free(i915->dependencies, dep); +} + +static void +__i915_priotree_add_dependency(struct i915_priotree *pt, + struct i915_priotree *signal, + struct i915_dependency *dep, + unsigned long flags) +{ + INIT_LIST_HEAD(&dep->dfs_link); + list_add(&dep->wait_link, &signal->waiters_list); + list_add(&dep->signal_link, &pt->signalers_list); + dep->signaler = signal; + dep->flags = flags; +} + +static int +i915_priotree_add_dependency(struct drm_i915_private *i915, + struct i915_priotree *pt, + struct i915_priotree *signal) +{ + struct i915_dependency *dep; + + dep = i915_dependency_alloc(i915); + if (!dep) + return -ENOMEM; + + __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + return 0; +} + +static void +i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +{ + struct i915_dependency *dep, *next; + + GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node)); + + /* Everyone we depended upon (the fences we wait to be signaled) + * should retire before us and remove themselves from our list. + * However, retirement is run independently on each timeline and + * so we may be called out-of-order. + */ + list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { + list_del(&dep->wait_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + /* Remove ourselves from everyone who depends upon us */ + list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { + list_del(&dep->signal_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } +} + +static void +i915_priotree_init(struct i915_priotree *pt) +{ + INIT_LIST_HEAD(&pt->signalers_list); + INIT_LIST_HEAD(&pt->waiters_list); + RB_CLEAR_NODE(&pt->node); + pt->priority = INT_MIN; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -182,6 +258,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) i915_gem_context_put(request->ctx); dma_fence_signal(&request->fence); + + i915_priotree_fini(request->i915, &request->priotree); i915_gem_request_put(request); } @@ -241,9 +319,8 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) { - while (intel_kick_waiters(i915) || intel_kick_signalers(i915)) - yield(); - yield(); + while (intel_breadcrumbs_busy(i915)) + cond_resched(); /* spin until threads are complete */ } atomic_set(&timeline->next_seqno, seqno); @@ -307,25 +384,16 @@ static u32 timeline_get_seqno(struct i915_gem_timeline *tl) return atomic_inc_return(&tl->next_seqno); } -static int __i915_sw_fence_call -submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +void __i915_gem_request_submit(struct drm_i915_gem_request *request) { - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; - unsigned long flags; u32 seqno; - if (state != FENCE_COMPLETE) - return NOTIFY_DONE; - /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); - - /* Will be called from irq-context when using foreign DMA fences */ - spin_lock_irqsave(&timeline->lock, flags); + assert_spin_locked(&timeline->lock); seqno = timeline_get_seqno(timeline->common); GEM_BUG_ON(!seqno); @@ -345,14 +413,43 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); - engine->submit_request(request); - spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + spin_lock(&request->timeline->lock); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - spin_unlock_irqrestore(&timeline->lock, flags); + i915_sw_fence_commit(&request->execute); +} + +void i915_gem_request_submit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_submit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + +static int __i915_sw_fence_call +submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ + if (state == FENCE_COMPLETE) { + struct drm_i915_gem_request *request = + container_of(fence, typeof(*request), submit); + + request->engine->submit_request(request); + } + + return NOTIFY_DONE; +} +static int __i915_sw_fence_call +execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) +{ return NOTIFY_DONE; } @@ -441,6 +538,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, __timeline_get_seqno(req->timeline->common)); i915_sw_fence_init(&req->submit, submit_notify); + i915_sw_fence_init(&req->execute, execute_notify); + /* Ensure that the execute fence completes after the submit fence - + * as we complete the execute fence from within the submit fence + * callback, its completion would otherwise be visible first. + */ + i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); + + i915_priotree_init(&req->priotree); INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; @@ -495,6 +600,14 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, GEM_BUG_ON(to == from); + if (to->engine->schedule) { + ret = i915_priotree_add_dependency(to->i915, + &to->priotree, + &from->priotree); + if (ret < 0) + return ret; + } + if (to->timeline == from->timeline) return 0; @@ -718,9 +831,15 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) prev = i915_gem_active_raw(&timeline->last_request, &request->i915->drm.struct_mutex); - if (prev) + if (prev) { i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); + if (engine->schedule) + __i915_priotree_add_dependency(&request->priotree, + &prev->priotree, + &request->dep, + 0); + } spin_lock_irq(&timeline->lock); list_add_tail(&request->link, &timeline->requests); @@ -737,6 +856,19 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) i915_gem_mark_busy(engine); + /* Let the backend know a new request has arrived that may need + * to adjust the existing execution schedule due to a high priority + * request - i.e. we may want to preempt the current request in order + * to run a high priority dependency chain *before* we can execute this + * request. + * + * This is called before the request is ready to run so that we can + * decide whether to preempt the entire chain so that it is ready to + * run at the earliest possible convenience. + */ + if (engine->schedule) + engine->schedule(request, request->ctx->priority); + local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ @@ -817,9 +949,9 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, } static long -__i915_request_wait_for_submit(struct drm_i915_gem_request *request, - unsigned int flags, - long timeout) +__i915_request_wait_for_execute(struct drm_i915_gem_request *request, + unsigned int flags, + long timeout) { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; @@ -831,9 +963,9 @@ __i915_request_wait_for_submit(struct drm_i915_gem_request *request, add_wait_queue(q, &reset); do { - prepare_to_wait(&request->submit.wait, &wait, state); + prepare_to_wait(&request->execute.wait, &wait, state); - if (i915_sw_fence_done(&request->submit)) + if (i915_sw_fence_done(&request->execute)) break; if (flags & I915_WAIT_LOCKED && @@ -851,7 +983,7 @@ __i915_request_wait_for_submit(struct drm_i915_gem_request *request, timeout = io_schedule_timeout(timeout); } while (timeout); - finish_wait(&request->submit.wait, &wait); + finish_wait(&request->execute.wait, &wait); if (flags & I915_WAIT_LOCKED) remove_wait_queue(q, &reset); @@ -903,13 +1035,14 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req); - if (!i915_sw_fence_done(&req->submit)) { - timeout = __i915_request_wait_for_submit(req, flags, timeout); + if (!i915_sw_fence_done(&req->execute)) { + timeout = __i915_request_wait_for_execute(req, flags, timeout); if (timeout < 0) goto complete; - GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); + GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); } + GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); GEM_BUG_ON(!req->global_seqno); /* Optimistic short spin before touching IRQs */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 75f8360b3421..e2b077df2da0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -30,6 +30,9 @@ #include "i915_gem.h" #include "i915_sw_fence.h" +struct drm_file; +struct drm_i915_gem_object; + struct intel_wait { struct rb_node node; struct task_struct *tsk; @@ -41,6 +44,33 @@ struct intel_signal_node { struct intel_wait wait; }; +struct i915_dependency { + struct i915_priotree *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +/* Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + */ +struct i915_priotree { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct rb_node node; + int priority; +#define I915_PRIORITY_MAX 1024 +#define I915_PRIORITY_MIN (-I915_PRIORITY_MAX) +}; + /** * Request queue structure. * @@ -84,8 +114,34 @@ struct drm_i915_gem_request { struct intel_timeline *timeline; struct intel_signal_node signaling; + /* Fences for the various phases in the request's lifetime. + * + * The submit fence is used to await upon all of the request's + * dependencies. When it is signaled, the request is ready to run. + * It is used by the driver to then queue the request for execution. + * + * The execute fence is used to signal when the request has been + * sent to hardware. + * + * It is illegal for the submit fence of one request to wait upon the + * execute fence of an earlier request. It should be sufficient to + * wait upon the submit fence of the earlier request. + */ struct i915_sw_fence submit; + struct i915_sw_fence execute; wait_queue_t submitq; + wait_queue_t execq; + + /* A list of everyone we wait upon, and everyone who waits upon us. + * Even though we will not be submitted to the hardware before the + * submit fence is signaled (it waits for all external events as well + * as our own requests), the scheduler still needs to know the + * dependency tree for the lifetime of the request (from execbuf + * to retirement), i.e. bidirectional dependency information for the + * request not tied to individual fences. + */ + struct i915_priotree priotree; + struct i915_dependency dep; u32 global_seqno; @@ -143,9 +199,6 @@ struct drm_i915_gem_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_list; - - /** Link in the execlist submission queue, guarded by execlist_lock. */ - struct list_head execlist_link; }; extern const struct dma_fence_ops i915_fence_ops; @@ -162,18 +215,6 @@ int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); -static inline u32 -i915_gem_request_get_seqno(struct drm_i915_gem_request *req) -{ - return req ? req->global_seqno : 0; -} - -static inline struct intel_engine_cs * -i915_gem_request_get_engine(struct drm_i915_gem_request *req) -{ - return req ? req->engine : NULL; -} - static inline struct drm_i915_gem_request * to_request(struct dma_fence *fence) { @@ -226,6 +267,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); #define i915_add_request_no_flush(req) \ __i915_add_request(req, false) +void __i915_gem_request_submit(struct drm_i915_gem_request *request); +void i915_gem_request_submit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index b1d367dba347..54085df1f227 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -596,7 +596,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev, obj->stolen = stolen; obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; - obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->cache_level = HAS_LLC(to_i915(dev)) ? + I915_CACHE_LLC : I915_CACHE_NONE; if (i915_gem_object_pin_pages(obj)) goto cleanup; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index fc8f13a79f8f..bf8a471b61e6 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -24,9 +24,11 @@ #include "i915_drv.h" -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name) +static int __i915_gem_timeline_init(struct drm_i915_private *i915, + struct i915_gem_timeline *timeline, + const char *name, + struct lock_class_key *lockclass, + const char *lockname) { unsigned int i; u64 fences; @@ -47,8 +49,11 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, tl->fence_context = fences++; tl->common = timeline; - +#ifdef CONFIG_DEBUG_SPINLOCK + __raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass); +#else spin_lock_init(&tl->lock); +#endif init_request_active(&tl->last_request, NULL); INIT_LIST_HEAD(&tl->requests); } @@ -56,6 +61,26 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, return 0; } +int i915_gem_timeline_init(struct drm_i915_private *i915, + struct i915_gem_timeline *timeline, + const char *name) +{ + static struct lock_class_key class; + + return __i915_gem_timeline_init(i915, timeline, name, + &class, "&timeline->lock"); +} + +int i915_gem_timeline_init__global(struct drm_i915_private *i915) +{ + static struct lock_class_key class; + + return __i915_gem_timeline_init(i915, + &i915->gt.global_timeline, + "[execution]", + &class, "&global_timeline->lock"); +} + void i915_gem_timeline_fini(struct i915_gem_timeline *tl) { lockdep_assert_held(&tl->i915->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2bf7b1d49a1..98d99a62b4ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -67,6 +67,7 @@ struct i915_gem_timeline { int i915_gem_timeline_init(struct drm_i915_private *i915, struct i915_gem_timeline *tl, const char *name); +int i915_gem_timeline_init__global(struct drm_i915_private *i915); void i915_gem_timeline_fini(struct i915_gem_timeline *tl); #endif diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 64261639f547..107ddf51065e 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -753,12 +753,13 @@ static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { int i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_userptr *args = data; struct drm_i915_gem_object *obj; int ret; u32 handle; - if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) { + if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) { /* We cannot support coherent userptr objects on hw without * LLC and broken snooping. */ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 204093f3eaa5..5d620bd5dd22 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -573,7 +573,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, pdev->subsystem_device); err_printf(m, "IOMMU enabled?: %d\n", error->iommu); - if (HAS_CSR(dev)) { + if (HAS_CSR(dev_priv)) { struct intel_csr *csr = &dev_priv->csr; err_printf(m, "DMC loaded: %s\n", @@ -861,16 +861,19 @@ out: static inline uint32_t __active_get_seqno(struct i915_gem_active *active) { - return i915_gem_request_get_seqno(__i915_gem_active_peek(active)); + struct drm_i915_gem_request *request; + + request = __i915_gem_active_peek(active); + return request ? request->global_seqno : 0; } static inline int __active_get_engine_id(struct i915_gem_active *active) { - struct intel_engine_cs *engine; + struct drm_i915_gem_request *request; - engine = i915_gem_request_get_engine(__i915_gem_active_peek(active)); - return engine ? engine->id : -1; + request = __i915_gem_active_peek(active); + return request ? request->engine->id : -1; } static void capture_bo(struct drm_i915_error_buffer *err, @@ -1489,7 +1492,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } /* 4: Everything else */ - if (HAS_HW_CONTEXTS(dev)) + if (HAS_HW_CONTEXTS(dev_priv)) error->ccid = I915_READ(CCID); if (INTEL_INFO(dev)->gen >= 8) { diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 666dab7a675a..4462112725ef 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -629,11 +629,23 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) static void i915_guc_submit(struct drm_i915_gem_request *rq) { struct drm_i915_private *dev_priv = rq->i915; - unsigned int engine_id = rq->engine->id; + struct intel_engine_cs *engine = rq->engine; + unsigned int engine_id = engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; int b_ret; + /* We keep the previous context alive until we retire the following + * request. This ensures that any the context object is still pinned + * for any residual writes the HW makes into it on the context switch + * into the next object following the breadcrumb. Otherwise, we may + * retire the context too early. + */ + rq->previous_context = engine->last_context; + engine->last_context = rq->ctx; + + i915_gem_request_submit(rq); + spin_lock(&client->wq_lock); guc_wq_item_append(client, rq); @@ -1520,6 +1532,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) /* Take over from manual control of ELSP (execlists) */ for_each_engine(engine, dev_priv, id) { engine->submit_request = i915_guc_submit; + engine->schedule = NULL; /* Replay the current set of previously submitted requests */ list_for_each_entry(request, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6d7505b5c5e7..cb8a75f6ca16 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3678,7 +3678,7 @@ static void i915_irq_preinstall(struct drm_device * dev) struct drm_i915_private *dev_priv = to_i915(dev); int pipe; - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); } @@ -3712,7 +3712,7 @@ static int i915_irq_postinstall(struct drm_device *dev) I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | I915_USER_INTERRUPT; - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); POSTING_READ(PORT_HOTPLUG_EN); @@ -3880,7 +3880,7 @@ static void i915_irq_uninstall(struct drm_device * dev) struct drm_i915_private *dev_priv = to_i915(dev); int pipe; - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { i915_hotplug_interrupt_update(dev_priv, 0xffffffff, 0); I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT)); } @@ -4145,7 +4145,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); - if (HAS_GUC_SCHED(dev)) + if (HAS_GUC_SCHED(dev_priv)) dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; /* Let's track the enabled rps events */ diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 629e4334719c..d46ffe7086bc 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -39,7 +39,7 @@ struct i915_params i915 __read_mostly = { .enable_hangcheck = true, .enable_ppgtt = -1, .enable_psr = -1, - .preliminary_hw_support = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT), + .alpha_support = IS_ENABLED(CONFIG_DRM_I915_ALPHA_SUPPORT), .disable_power_well = -1, .enable_ips = 1, .fastboot = 0, @@ -145,9 +145,10 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " "Default: -1 (use per-chip default)"); -module_param_named_unsafe(preliminary_hw_support, i915.preliminary_hw_support, int, 0400); -MODULE_PARM_DESC(preliminary_hw_support, - "Enable preliminary hardware support."); +module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400); +MODULE_PARM_DESC(alpha_support, + "Enable alpha quality driver support for latest hardware. " + "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); module_param_named_unsafe(disable_power_well, i915.disable_power_well, int, 0400); MODULE_PARM_DESC(disable_power_well, diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 94efc899c1ef..817ad959941e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -40,7 +40,7 @@ struct i915_params { int enable_ppgtt; int enable_execlists; int enable_psr; - unsigned int preliminary_hw_support; + unsigned int alpha_support; int disable_power_well; int enable_ips; int invert_brightness; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2a419500b81a..fce8e198bc76 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -363,6 +363,7 @@ static const struct intel_device_info intel_broxton_info = { .has_hw_contexts = 1, .has_logical_ring_contexts = 1, .has_guc = 1, + .has_decoupled_mmio = 1, .ddb_size = 512, GEN_DEFAULT_PIPEOFFSETS, IVB_CURSOR_OFFSETS, @@ -439,9 +440,10 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct intel_device_info *intel_info = (struct intel_device_info *) ent->driver_data; - if (IS_PRELIMINARY_HW(intel_info) && !i915.preliminary_hw_support) { - DRM_INFO("This hardware requires preliminary hardware support.\n" - "See CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT, and/or modparam preliminary_hw_support\n"); + if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { + DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" + "See CONFIG_DRM_I915_ALPHA_SUPPORT or i915.alpha_support module parameter\n" + "to enable support in this kernel version, or check for kernel updates.\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3361d7ffc63e..c70c07a7b586 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7342,6 +7342,13 @@ enum { #define SKL_FUSE_PG1_DIST_STATUS (1<<26) #define SKL_FUSE_PG2_DIST_STATUS (1<<25) +/* Decoupled MMIO register pair for kernel driver */ +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00) +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04) +#define GEN9_DECOUPLED_DW1_GO (1<<31) +#define GEN9_DECOUPLED_PD_SHIFT 28 +#define GEN9_DECOUPLED_OP_SHIFT 24 + /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 95f2f12e0917..147420ccf49c 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -116,11 +116,14 @@ static void i915_sw_fence_await(struct i915_sw_fence *fence) WARN_ON(atomic_inc_return(&fence->pending) <= 1); } -void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn) +void __i915_sw_fence_init(struct i915_sw_fence *fence, + i915_sw_fence_notify_t fn, + const char *name, + struct lock_class_key *key) { BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK); - init_waitqueue_head(&fence->wait); + __init_waitqueue_head(&fence->wait, name, key); kref_init(&fence->kref); atomic_set(&fence->pending, 1); fence->flags = (unsigned long)fn; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 707dfc4f0da5..7508d23f823b 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -40,7 +40,22 @@ typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *, enum i915_sw_fence_notify state); #define __i915_sw_fence_call __aligned(4) -void i915_sw_fence_init(struct i915_sw_fence *fence, i915_sw_fence_notify_t fn); +void __i915_sw_fence_init(struct i915_sw_fence *fence, + i915_sw_fence_notify_t fn, + const char *name, + struct lock_class_key *key); +#ifdef CONFIG_LOCKDEP +#define i915_sw_fence_init(fence, fn) \ +do { \ + static struct lock_class_key __key; \ + \ + __i915_sw_fence_init((fence), (fn), #fence, &__key); \ +} while (0) +#else +#define i915_sw_fence_init(fence, fn) \ + __i915_sw_fence_init((fence), (fn), NULL, NULL) +#endif + void i915_sw_fence_commit(struct i915_sw_fence *fence); int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c new file mode 100644 index 000000000000..738ff3a5cd6e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -0,0 +1,650 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_vma.h" + +#include "i915_drv.h" +#include "intel_ringbuffer.h" +#include "intel_frontbuffer.h" + +#include <drm/drm_gem.h> + +static void +i915_vma_retire(struct i915_gem_active *active, + struct drm_i915_gem_request *rq) +{ + const unsigned int idx = rq->engine->id; + struct i915_vma *vma = + container_of(active, struct i915_vma, last_read[idx]); + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); + + i915_vma_clear_active(vma, idx); + if (i915_vma_is_active(vma)) + return; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) + WARN_ON(i915_vma_unbind(vma)); + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + if (--obj->active_count) + return; + + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + if (obj->bind_count) + list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + + obj->mm.dirty = true; /* be paranoid */ + + if (i915_gem_object_has_active_reference(obj)) { + i915_gem_object_clear_active_reference(obj); + i915_gem_object_put(obj); + } +} + +static void +i915_ggtt_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + struct i915_vma *vma = + container_of(active, struct i915_vma, last_write); + + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); +} + +static struct i915_vma * +__i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + struct rb_node *rb, **p; + int i; + + GEM_BUG_ON(vm->closed); + + vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); + if (vma == NULL) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&vma->exec_list); + for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) + init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_write, + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); + init_request_active(&vma->last_fence, NULL); + list_add(&vma->vm_link, &vm->unbound_list); + vma->vm = vm; + vma->obj = obj; + vma->size = obj->base.size; + + if (view) { + vma->ggtt_view = *view; + if (view->type == I915_GGTT_VIEW_PARTIAL) { + vma->size = view->params.partial.size; + vma->size <<= PAGE_SHIFT; + } else if (view->type == I915_GGTT_VIEW_ROTATED) { + vma->size = + intel_rotation_info_size(&view->params.rotated); + vma->size <<= PAGE_SHIFT; + } + } + + if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; + list_add(&vma->obj_link, &obj->vma_list); + } else { + i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + list_add_tail(&vma->obj_link, &obj->vma_list); + } + + rb = NULL; + p = &obj->vma_tree.rb_node; + while (*p) { + struct i915_vma *pos; + + rb = *p; + pos = rb_entry(rb, struct i915_vma, obj_node); + if (i915_vma_compare(pos, vm, view) < 0) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&vma->obj_node, rb, p); + rb_insert_color(&vma->obj_node, &obj->vma_tree); + + return vma; +} + +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); + + return __i915_vma_create(obj, vm, view); +} + +/** + * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. + * @vma: VMA to map + * @cache_level: mapping cache level + * @flags: flags like global or local mapping + * + * DMA addresses are taken from the scatter-gather table of this object (or of + * this VMA in case of non-default GGTT views) and PTE entries set up. + * Note that DMA addresses are also the only part of the SG table we care about. + */ +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, + u32 flags) +{ + u32 bind_flags; + u32 vma_flags; + int ret; + + if (WARN_ON(flags == 0)) + return -EINVAL; + + bind_flags = 0; + if (flags & PIN_GLOBAL) + bind_flags |= I915_VMA_GLOBAL_BIND; + if (flags & PIN_USER) + bind_flags |= I915_VMA_LOCAL_BIND; + + vma_flags = vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + if (flags & PIN_UPDATE) + bind_flags |= vma_flags; + else + bind_flags &= ~vma_flags; + if (bind_flags == 0) + return 0; + + if (vma_flags == 0 && vma->vm->allocate_va_range) { + trace_i915_va_alloc(vma); + ret = vma->vm->allocate_va_range(vma->vm, + vma->node.start, + vma->node.size); + if (ret) + return ret; + } + + ret = vma->vm->bind_vma(vma, cache_level, bind_flags); + if (ret) + return ret; + + vma->flags |= bind_flags; + return 0; +} + +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) +{ + void __iomem *ptr; + + /* Access through the GTT requires the device to be awake. */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) + return IO_ERR_PTR(-ENODEV); + + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON((vma->flags & I915_VMA_GLOBAL_BIND) == 0); + + ptr = vma->iomap; + if (ptr == NULL) { + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, + vma->node.start, + vma->node.size); + if (ptr == NULL) + return IO_ERR_PTR(-ENOMEM); + + vma->iomap = ptr; + } + + __i915_vma_pin(vma); + return ptr; +} + +void i915_vma_unpin_and_release(struct i915_vma **p_vma) +{ + struct i915_vma *vma; + struct drm_i915_gem_object *obj; + + vma = fetch_and_zero(p_vma); + if (!vma) + return; + + obj = vma->obj; + + i915_vma_unpin(vma); + i915_vma_close(vma); + + __i915_gem_object_release_unless_active(obj); +} + +bool +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + if (!drm_mm_node_allocated(&vma->node)) + return false; + + if (vma->node.size < size) + return true; + + if (alignment && vma->node.start & (alignment - 1)) + return true; + + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) + return true; + + if (flags & PIN_OFFSET_BIAS && + vma->node.start < (flags & PIN_OFFSET_MASK)) + return true; + + if (flags & PIN_OFFSET_FIXED && + vma->node.start != (flags & PIN_OFFSET_MASK)) + return true; + + return false; +} + +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + bool mappable, fenceable; + u32 fence_size, fence_alignment; + + fence_size = i915_gem_get_ggtt_size(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj)); + fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, + vma->size, + i915_gem_object_get_tiling(obj), + true); + + fenceable = (vma->node.size == fence_size && + (vma->node.start & (fence_alignment - 1)) == 0); + + mappable = (vma->node.start + fence_size <= + dev_priv->ggtt.mappable_end); + + /* + * Explicitly disable for rotated VMA since the display does not + * need the fence and the VMA is not accessible to other users. + */ + if (mappable && fenceable && + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) + vma->flags |= I915_VMA_CAN_FENCE; + else + vma->flags &= ~I915_VMA_CAN_FENCE; +} + +bool i915_gem_valid_gtt_space(struct i915_vma *vma, + unsigned long cache_level) +{ + struct drm_mm_node *gtt_space = &vma->node; + struct drm_mm_node *other; + + /* + * On some machines we have to be careful when putting differing types + * of snoopable memory together to avoid the prefetcher crossing memory + * domains and dying. During vm initialisation, we decide whether or not + * these constraints apply and set the drm_mm.color_adjust + * appropriately. + */ + if (vma->vm->mm.color_adjust == NULL) + return true; + + if (!drm_mm_node_allocated(gtt_space)) + return true; + + if (list_empty(>t_space->node_list)) + return true; + + other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list); + if (other->allocated && !other->hole_follows && other->color != cache_level) + return false; + + other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list); + if (other->allocated && !gtt_space->hole_follows && other->color != cache_level) + return false; + + return true; +} + +/** + * i915_vma_insert - finds a slot for the vma in its address space + * @vma: the vma + * @size: requested size in bytes (can be larger than the VMA) + * @alignment: required alignment + * @flags: mask of PIN_* flags to use + * + * First we try to allocate some free space that meets the requirements for + * the VMA. Failiing that, if the flags permit, it will evict an old VMA, + * preferrably the oldest idle entry to make room for the new VMA. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +static int +i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); + struct drm_i915_gem_object *obj = vma->obj; + u64 start, end; + int ret; + + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + size = max(size, vma->size); + if (flags & PIN_MAPPABLE) + size = i915_gem_get_ggtt_size(dev_priv, size, + i915_gem_object_get_tiling(obj)); + + alignment = max(max(alignment, vma->display_alignment), + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE)); + + start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; + + end = vma->vm->total; + if (flags & PIN_MAPPABLE) + end = min_t(u64, end, dev_priv->ggtt.mappable_end); + if (flags & PIN_ZONE_4G) + end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE); + + /* If binding the object/GGTT view requires more space than the entire + * aperture has, reject it early before evicting everything in a vain + * attempt to find space. + */ + if (size > end) { + DRM_DEBUG("Attempting to bind an object larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n", + size, obj->base.size, + flags & PIN_MAPPABLE ? "mappable" : "total", + end); + return -E2BIG; + } + + ret = i915_gem_object_pin_pages(obj); + if (ret) + return ret; + + if (flags & PIN_OFFSET_FIXED) { + u64 offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1) || offset > end - size) { + ret = -EINVAL; + goto err_unpin; + } + + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vma->vm->mm, &vma->node); + if (ret) + goto err_unpin; + } + } else { + u32 search_flag, alloc_flag; + + if (flags & PIN_HIGH) { + search_flag = DRM_MM_SEARCH_BELOW; + alloc_flag = DRM_MM_CREATE_TOP; + } else { + search_flag = DRM_MM_SEARCH_DEFAULT; + alloc_flag = DRM_MM_CREATE_DEFAULT; + } + + /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks, + * so we know that we always have a minimum alignment of 4096. + * The drm_mm range manager is optimised to return results + * with zero alignment, so where possible use the optimal + * path. + */ + if (alignment <= 4096) + alignment = 0; + +search_free: + ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm, + &vma->node, + size, alignment, + obj->cache_level, + start, end, + search_flag, + alloc_flag); + if (ret) { + ret = i915_gem_evict_something(vma->vm, size, alignment, + obj->cache_level, + start, end, + flags); + if (ret == 0) + goto search_free; + + goto err_unpin; + } + + GEM_BUG_ON(vma->node.start < start); + GEM_BUG_ON(vma->node.start + vma->node.size > end); + } + GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); + + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + obj->bind_count++; + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); + return ret; +} + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) +{ + unsigned int bound = vma->flags; + int ret; + + lockdep_assert_held(&vma->vm->dev->struct_mutex); + GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); + GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); + + if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { + ret = -EBUSY; + goto err; + } + + if ((bound & I915_VMA_BIND_MASK) == 0) { + ret = i915_vma_insert(vma, size, alignment, flags); + if (ret) + goto err; + } + + ret = i915_vma_bind(vma, vma->obj->cache_level, flags); + if (ret) + goto err; + + if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) + __i915_vma_set_map_and_fenceable(vma); + + GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); + return 0; + +err: + __i915_vma_unpin(vma); + return ret; +} + +void i915_vma_destroy(struct i915_vma *vma) +{ + GEM_BUG_ON(vma->node.allocated); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(!i915_vma_is_closed(vma)); + GEM_BUG_ON(vma->fence); + + list_del(&vma->vm_link); + if (!i915_vma_is_ggtt(vma)) + i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); + + kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); +} + +void i915_vma_close(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); + + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) + WARN_ON(i915_vma_unbind(vma)); +} + +static void __i915_vma_iounmap(struct i915_vma *vma) +{ + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (vma->iomap == NULL) + return; + + io_mapping_unmap(vma->iomap); + vma->iomap = NULL; +} + +int i915_vma_unbind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + unsigned long active; + int ret; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + + /* First wait upon any activity as retiring the request may + * have side-effects such as unpinning or even unbinding this vma. + */ + active = i915_vma_get_active(vma); + if (active) { + int idx; + + /* When a closed VMA is retired, it is unbound - eek. + * In order to prevent it from being recursively closed, + * take a pin on the vma so that the second unbind is + * aborted. + * + * Even more scary is that the retire callback may free + * the object (last active vma). To prevent the explosion + * we defer the actual object free to a worker that can + * only proceed once it acquires the struct_mutex (which + * we currently hold, therefore it cannot free this object + * before we are finished). + */ + __i915_vma_pin(vma); + + for_each_active(active, idx) { + ret = i915_gem_active_retire(&vma->last_read[idx], + &vma->vm->dev->struct_mutex); + if (ret) + break; + } + + __i915_vma_unpin(vma); + if (ret) + return ret; + + GEM_BUG_ON(i915_vma_is_active(vma)); + } + + if (i915_vma_is_pinned(vma)) + return -EBUSY; + + if (!drm_mm_node_allocated(&vma->node)) + goto destroy; + + GEM_BUG_ON(obj->bind_count == 0); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + + if (i915_vma_is_map_and_fenceable(vma)) { + /* release the fence reg _after_ flushing */ + ret = i915_vma_put_fence(vma); + if (ret) + return ret; + + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + + __i915_vma_iounmap(vma); + vma->flags &= ~I915_VMA_CAN_FENCE; + } + + if (likely(!vma->vm->closed)) { + trace_i915_vma_unbind(vma); + vma->vm->unbind_vma(vma); + } + vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + if (vma->pages != obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + +destroy: + if (unlikely(i915_vma_is_closed(vma))) + i915_vma_destroy(vma); + + return 0; +} + diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h new file mode 100644 index 000000000000..2e49f5dd6107 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -0,0 +1,342 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_VMA_H__ +#define __I915_VMA_H__ + +#include <linux/io-mapping.h> + +#include <drm/drm_mm.h> + +#include "i915_gem_gtt.h" +#include "i915_gem_fence_reg.h" +#include "i915_gem_object.h" +#include "i915_gem_request.h" + + +enum i915_cache_level; + +/** + * A VMA represents a GEM BO that is bound into an address space. Therefore, a + * VMA's presence cannot be guaranteed before binding, or after unbinding the + * object into/from the address space. + * + * To make things as simple as possible (ie. no refcounting), a VMA's lifetime + * will always be <= an objects lifetime. So object refcounting should cover us. + */ +struct i915_vma { + struct drm_mm_node node; + struct drm_i915_gem_object *obj; + struct i915_address_space *vm; + struct drm_i915_fence_reg *fence; + struct sg_table *pages; + void __iomem *iomap; + u64 size; + u64 display_alignment; + + unsigned int flags; + /** + * How many users have pinned this object in GTT space. The following + * users can each hold at most one reference: pwrite/pread, execbuffer + * (objects are not allowed multiple times for the same batchbuffer), + * and the framebuffer code. When switching/pageflipping, the + * framebuffer code has at most two buffers pinned per crtc. + * + * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3 + * bits with absolutely no headroom. So use 4 bits. + */ +#define I915_VMA_PIN_MASK 0xf +#define I915_VMA_PIN_OVERFLOW BIT(5) + + /** Flags and address space this VMA is bound to */ +#define I915_VMA_GLOBAL_BIND BIT(6) +#define I915_VMA_LOCAL_BIND BIT(7) +#define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) + +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CAN_FENCE BIT(9) +#define I915_VMA_CLOSED BIT(10) + + unsigned int active; + struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; + + /** + * Support different GGTT views into the same object. + * This means there can be multiple VMA mappings per object and per VM. + * i915_ggtt_view_type is used to distinguish between those entries. + * The default one of zero (I915_GGTT_VIEW_NORMAL) is default and also + * assumed in GEM functions which take no ggtt view parameter. + */ + struct i915_ggtt_view ggtt_view; + + /** This object's place on the active/inactive lists */ + struct list_head vm_link; + + struct list_head obj_link; /* Link in the object's VMA list */ + struct rb_node obj_node; + + /** This vma's place in the batchbuffer or on the eviction list */ + struct list_head exec_list; + + /** + * Used for performing relocations during execbuffer insertion. + */ + struct hlist_node exec_node; + unsigned long exec_handle; + struct drm_i915_gem_exec_object2 *exec_entry; +}; + +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); + +void i915_vma_unpin_and_release(struct i915_vma **p_vma); + +static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_GGTT; +} + +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CAN_FENCE; +} + +static inline bool i915_vma_is_closed(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CLOSED; +} + +static inline unsigned int i915_vma_get_active(const struct i915_vma *vma) +{ + return vma->active; +} + +static inline bool i915_vma_is_active(const struct i915_vma *vma) +{ + return i915_vma_get_active(vma); +} + +static inline void i915_vma_set_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active |= BIT(engine); +} + +static inline void i915_vma_clear_active(struct i915_vma *vma, + unsigned int engine) +{ + vma->active &= ~BIT(engine); +} + +static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, + unsigned int engine) +{ + return vma->active & BIT(engine); +} + +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(upper_32_bits(vma->node.start)); + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); + return lower_32_bits(vma->node.start); +} + +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) +{ + i915_gem_object_get(vma->obj); + return vma; +} + +static inline void i915_vma_put(struct i915_vma *vma) +{ + i915_gem_object_put(vma->obj); +} + +static inline long +i915_vma_compare(struct i915_vma *vma, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + + if (vma->vm != vm) + return vma->vm - vm; + + if (!view) + return vma->ggtt_view.type; + + if (vma->ggtt_view.type != view->type) + return vma->ggtt_view.type - view->type; + + return memcmp(&vma->ggtt_view.params, + &view->params, + sizeof(view->params)); +} + +int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, + u32 flags); +bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); +bool +i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); +int __must_check i915_vma_unbind(struct i915_vma *vma); +void i915_vma_close(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); + +int __i915_vma_do_pin(struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); +static inline int __must_check +i915_vma_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +{ + BUILD_BUG_ON(PIN_MBZ != I915_VMA_PIN_OVERFLOW); + BUILD_BUG_ON(PIN_GLOBAL != I915_VMA_GLOBAL_BIND); + BUILD_BUG_ON(PIN_USER != I915_VMA_LOCAL_BIND); + + /* Pin early to prevent the shrinker/eviction logic from destroying + * our vma as we insert and bind. + */ + if (likely(((++vma->flags ^ flags) & I915_VMA_BIND_MASK) == 0)) + return 0; + + return __i915_vma_do_pin(vma, size, alignment, flags); +} + +static inline int i915_vma_pin_count(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_PIN_MASK; +} + +static inline bool i915_vma_is_pinned(const struct i915_vma *vma) +{ + return i915_vma_pin_count(vma); +} + +static inline void __i915_vma_pin(struct i915_vma *vma) +{ + vma->flags++; + GEM_BUG_ON(vma->flags & I915_VMA_PIN_OVERFLOW); +} + +static inline void __i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_pinned(vma)); + vma->flags--; +} + +static inline void i915_vma_unpin(struct i915_vma *vma) +{ + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + __i915_vma_unpin(vma); +} + +/** + * i915_vma_pin_iomap - calls ioremap_wc to map the GGTT VMA via the aperture + * @vma: VMA to iomap + * + * The passed in VMA has to be pinned in the global GTT mappable region. + * An extra pinning of the VMA is acquired for the return iomapping, + * the caller must call i915_vma_unpin_iomap to relinquish the pinning + * after the iomapping is no longer required. + * + * Callers must hold the struct_mutex. + * + * Returns a valid iomapped pointer or ERR_PTR. + */ +void __iomem *i915_vma_pin_iomap(struct i915_vma *vma); +#define IO_ERR_PTR(x) ((void __iomem *)ERR_PTR(x)) + +/** + * i915_vma_unpin_iomap - unpins the mapping returned from i915_vma_iomap + * @vma: VMA to unpin + * + * Unpins the previously iomapped VMA from i915_vma_pin_iomap(). + * + * Callers must hold the struct_mutex. This function is only valid to be + * called on a VMA previously iomapped by the caller with i915_vma_pin_iomap(). + */ +static inline void i915_vma_unpin_iomap(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + GEM_BUG_ON(vma->iomap == NULL); + i915_vma_unpin(vma); +} + +static inline struct page *i915_vma_first_page(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + return sg_page(vma->pages->sgl); +} + +/** + * i915_vma_pin_fence - pin fencing state + * @vma: vma to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * vma (and its object) is ready to be used as a scanout target. Fencing + * status must be synchronize first by calling i915_vma_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_vma_unpin_fence(). + * + * Returns: + * + * True if the vma has a fence, false otherwise. + */ +static inline bool +i915_vma_pin_fence(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (vma->fence) { + vma->fence->pin_count++; + return true; + } else + return false; +} + +/** + * i915_vma_unpin_fence - unpin fencing state + * @vma: vma to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_vma_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ +static inline void +i915_vma_unpin_fence(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (vma->fence) { + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; + } +} + +#endif + diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 984a6b75c118..ff821649486e 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -106,6 +106,7 @@ intel_plane_destroy_state(struct drm_plane *plane, static int intel_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) { + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_crtc *crtc = state->crtc; struct intel_crtc *intel_crtc; struct intel_crtc_state *crtc_state; @@ -167,6 +168,14 @@ static int intel_plane_atomic_check(struct drm_plane *plane, } } + /* CHV ignores the mirror bit when the rotate bit is set :( */ + if (IS_CHERRYVIEW(dev_priv) && + state->rotation & DRM_ROTATE_180 && + state->rotation & DRM_REFLECT_X) { + DRM_DEBUG_KMS("Cannot rotate and reflect at the same time\n"); + return -EINVAL; + } + intel_state->base.visible = false; ret = intel_plane->check_plane(plane, crtc_state, intel_state); if (ret) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 813fd74d9c8d..1c509f7410f5 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -574,23 +574,26 @@ static void ilk_audio_codec_enable(struct drm_connector *connector, /** * intel_audio_codec_enable - Enable the audio codec for HD audio * @intel_encoder: encoder on which to enable audio + * @crtc_state: pointer to the current crtc state. + * @conn_state: pointer to the current connector state. * * The enable sequences may only be performed after enabling the transcoder and * port, and after completed link training. */ -void intel_audio_codec_enable(struct intel_encoder *intel_encoder) +void intel_audio_codec_enable(struct intel_encoder *intel_encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); - const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &crtc_state->base.adjusted_mode; struct drm_connector *connector; struct drm_i915_private *dev_priv = to_i915(encoder->dev); struct i915_audio_component *acomp = dev_priv->audio_component; enum port port = intel_encoder->port; - enum pipe pipe = crtc->pipe; + enum pipe pipe = to_intel_crtc(crtc_state->base.crtc)->pipe; - connector = drm_select_eld(encoder); - if (!connector) + connector = conn_state->connector; + if (!connector || !connector->eld[0]) return; DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n", @@ -601,7 +604,7 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder) /* ELD Conn_Type */ connector->eld[5] &= ~(3 << 2); - if (intel_crtc_has_dp_encoder(crtc->config)) + if (intel_crtc_has_dp_encoder(crtc_state)) connector->eld[5] |= (1 << 2); connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 5ab646ef8c9f..7ffab1abc518 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1147,7 +1147,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->raw[25]; + aux_channel = child->common.aux_channel; ddc_pin = child->common.ddc_pin; is_dvi = child->common.device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; @@ -1677,7 +1677,8 @@ bool intel_bios_is_port_edp(struct drm_i915_private *dev_priv, enum port port) return false; } -bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum port port) +static bool child_dev_is_dp_dual_mode(const union child_device_config *p_child, + enum port port) { static const struct { u16 dp, hdmi; @@ -1691,22 +1692,35 @@ bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, enum por [PORT_D] = { DVO_PORT_DPD, DVO_PORT_HDMID, }, [PORT_E] = { DVO_PORT_DPE, DVO_PORT_HDMIE, }, }; - int i; if (port == PORT_A || port >= ARRAY_SIZE(port_mapping)) return false; - if (!dev_priv->vbt.child_dev_num) + if ((p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) != + (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) return false; + if (p_child->common.dvo_port == port_mapping[port].dp) + return true; + + /* Only accept a HDMI dvo_port as DP++ if it has an AUX channel */ + if (p_child->common.dvo_port == port_mapping[port].hdmi && + p_child->common.aux_channel != 0) + return true; + + return false; +} + +bool intel_bios_is_port_dp_dual_mode(struct drm_i915_private *dev_priv, + enum port port) +{ + int i; + for (i = 0; i < dev_priv->vbt.child_dev_num; i++) { const union child_device_config *p_child = &dev_priv->vbt.child_dev[i]; - if ((p_child->common.dvo_port == port_mapping[port].dp || - p_child->common.dvo_port == port_mapping[port].hdmi) && - (p_child->common.device_type & DEVICE_TYPE_DP_DUAL_MODE_BITS) == - (DEVICE_TYPE_DP_DUAL_MODE & DEVICE_TYPE_DP_DUAL_MODE_BITS)) + if (child_dev_is_dp_dual_mode(p_child, port)) return true; } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index c410d3d6465f..c9c46a538edb 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -629,35 +629,28 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_kick_waiters(struct drm_i915_private *i915) +unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) { struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int mask = 0; - /* To avoid the task_struct disappearing beneath us as we wake up - * the process, we must first inspect the task_struct->state under the - * RCU lock, i.e. as we call wake_up_process() we must be holding the - * rcu_read_lock(). - */ - for_each_engine(engine, i915, id) - if (unlikely(intel_engine_wakeup(engine))) - mask |= intel_engine_flag(engine); + for_each_engine(engine, i915, id) { + struct intel_breadcrumbs *b = &engine->breadcrumbs; - return mask; -} + spin_lock_irq(&b->lock); -unsigned int intel_kick_signalers(struct drm_i915_private *i915) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; + if (b->first_wait) { + wake_up_process(b->first_wait->tsk); + mask |= intel_engine_flag(engine); + } - for_each_engine(engine, i915, id) { - if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) { - wake_up_process(engine->breadcrumbs.signaler); + if (b->first_signal) { + wake_up_process(b->signaler); mask |= intel_engine_flag(engine); } + + spin_unlock_irq(&b->lock); } return mask; diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 445108855275..3784940a4e7a 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -345,11 +345,10 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) static void broadwell_load_luts(struct drm_crtc_state *state) { struct drm_crtc *crtc = state->crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(crtc)->pipe; - uint32_t i, lut_size = INTEL_INFO(dev)->color.degamma_lut_size; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; if (crtc_state_is_legacy(state)) { haswell_load_luts(state); @@ -428,8 +427,7 @@ static void broadwell_load_luts(struct drm_crtc_state *state) static void cherryview_load_luts(struct drm_crtc_state *state) { struct drm_crtc *crtc = state->crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum pipe pipe = to_intel_crtc(crtc)->pipe; struct drm_color_lut *lut; uint32_t i, lut_size; @@ -446,7 +444,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) if (state->degamma_lut) { lut = (struct drm_color_lut *) state->degamma_lut->data; - lut_size = INTEL_INFO(dev)->color.degamma_lut_size; + lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; for (i = 0; i < lut_size; i++) { /* Write LUT in U0.14 format. */ word0 = @@ -461,7 +459,7 @@ static void cherryview_load_luts(struct drm_crtc_state *state) if (state->gamma_lut) { lut = (struct drm_color_lut *) state->gamma_lut->data; - lut_size = INTEL_INFO(dev)->color.gamma_lut_size; + lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; for (i = 0; i < lut_size; i++) { /* Write LUT in U0.10 format. */ word0 = @@ -497,12 +495,12 @@ void intel_color_load_luts(struct drm_crtc_state *crtc_state) int intel_color_check(struct drm_crtc *crtc, struct drm_crtc_state *crtc_state) { - struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); size_t gamma_length, degamma_length; - degamma_length = INTEL_INFO(dev)->color.degamma_lut_size * + degamma_length = INTEL_INFO(dev_priv)->color.degamma_lut_size * sizeof(struct drm_color_lut); - gamma_length = INTEL_INFO(dev)->color.gamma_lut_size * + gamma_length = INTEL_INFO(dev_priv)->color.gamma_lut_size * sizeof(struct drm_color_lut); /* @@ -529,8 +527,7 @@ int intel_color_check(struct drm_crtc *crtc, void intel_color_init(struct drm_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); drm_mode_crtc_set_gamma_size(crtc, 256); @@ -549,10 +546,10 @@ void intel_color_init(struct drm_crtc *crtc) } /* Enable color management support when we have degamma & gamma LUTs. */ - if (INTEL_INFO(dev)->color.degamma_lut_size != 0 && - INTEL_INFO(dev)->color.gamma_lut_size != 0) + if (INTEL_INFO(dev_priv)->color.degamma_lut_size != 0 && + INTEL_INFO(dev_priv)->color.gamma_lut_size != 0) drm_crtc_enable_color_mgmt(crtc, - INTEL_INFO(dev)->color.degamma_lut_size, - true, - INTEL_INFO(dev)->color.gamma_lut_size); + INTEL_INFO(dev_priv)->color.degamma_lut_size, + true, + INTEL_INFO(dev_priv)->color.gamma_lut_size); } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 30eb95b54dcf..fed61958ffd4 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -693,7 +693,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) power_domain = intel_display_port_power_domain(intel_encoder); intel_display_power_get(dev_priv, power_domain); - if (I915_HAS_HOTPLUG(dev)) { + if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired * up, for example many KVM do not pass it through, and so * only trust an assertion that the monitor is connected. @@ -715,7 +715,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) * broken monitor (without edid) to work behind a broken kvm (that fails * to have the right resistors for HP detection) needs to fix this up. * For now just bail out. */ - if (I915_HAS_HOTPLUG(dev) && !i915.load_detect_test) { + if (I915_HAS_HOTPLUG(dev_priv) && !i915.load_detect_test) { status = connector_status_disconnected; goto out; } @@ -915,7 +915,7 @@ void intel_crt_init(struct drm_device *dev) crt->base.disable = intel_disable_crt; } crt->base.enable = intel_enable_crt; - if (I915_HAS_HOTPLUG(dev) && + if (I915_HAS_HOTPLUG(dev_priv) && !dmi_check_system(intel_spurious_crt_detect)) crt->base.hpd_pin = HPD_CRT; if (HAS_DDI(dev_priv)) { @@ -932,7 +932,7 @@ void intel_crt_init(struct drm_device *dev) drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs); - if (!I915_HAS_HOTPLUG(dev)) + if (!I915_HAS_HOTPLUG(dev_priv)) intel_connector->polled = DRM_CONNECTOR_POLL_CONNECT; /* diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 938ac4dbcb45..0ad4e16a639f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1866,7 +1866,7 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, if (intel_crtc->config->has_audio) { intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); - intel_audio_codec_enable(intel_encoder); + intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); } } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6fd6283fa1f8..2ebb8b833395 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3008,7 +3008,6 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int plane = intel_crtc->plane; u32 linear_offset; u32 dspcntr; @@ -3070,6 +3069,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; + if (rotation & DRM_ROTATE_180) + dspcntr |= DISPPLANE_ROTATE_180; + + if (rotation & DRM_REFLECT_X) + dspcntr |= DISPPLANE_MIRROR; + if (IS_G4X(dev_priv)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -3079,11 +3084,11 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, intel_crtc->dspaddr_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == DRM_ROTATE_180) { - dspcntr |= DISPPLANE_ROTATE_180; - - x += (crtc_state->pipe_src_w - 1); - y += (crtc_state->pipe_src_h - 1); + if (rotation & DRM_ROTATE_180) { + x += crtc_state->pipe_src_w - 1; + y += crtc_state->pipe_src_h - 1; + } else if (rotation & DRM_REFLECT_X) { + x += crtc_state->pipe_src_w - 1; } linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); @@ -3103,8 +3108,11 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, intel_crtc->dspaddr_offset); I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); - } else - I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset); + } else { + I915_WRITE(DSPADDR(plane), + intel_fb_gtt_offset(fb, rotation) + + intel_crtc->dspaddr_offset); + } POSTING_READ(reg); } @@ -3172,6 +3180,9 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; + if (rotation & DRM_ROTATE_180) + dspcntr |= DISPPLANE_ROTATE_180; + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -3180,13 +3191,11 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, intel_crtc->dspaddr_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == DRM_ROTATE_180) { - dspcntr |= DISPPLANE_ROTATE_180; - - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { - x += (crtc_state->pipe_src_w - 1); - y += (crtc_state->pipe_src_h - 1); - } + /* HSW+ does this automagically in hardware */ + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && + rotation & DRM_ROTATE_180) { + x += crtc_state->pipe_src_w - 1; + y += crtc_state->pipe_src_h - 1; } linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); @@ -3376,9 +3385,6 @@ static void skylake_update_primary_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = plane_state->base.fb; - const struct skl_wm_values *wm = &dev_priv->wm.skl_results; - const struct skl_plane_wm *p_wm = - &crtc_state->wm.skl.optimal.planes[0]; int pipe = intel_crtc->pipe; u32 plane_ctl; unsigned int rotation = plane_state->base.rotation; @@ -3414,9 +3420,6 @@ static void skylake_update_primary_plane(struct drm_plane *plane, intel_crtc->adjusted_x = src_x; intel_crtc->adjusted_y = src_y; - if (wm->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) - skl_write_plane_wm(intel_crtc, p_wm, &wm->ddb, 0); - I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl); I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x); I915_WRITE(PLANE_STRIDE(pipe, 0), stride); @@ -3449,18 +3452,8 @@ static void skylake_disable_primary_plane(struct drm_plane *primary, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); - const struct skl_plane_wm *p_wm = &cstate->wm.skl.optimal.planes[0]; int pipe = intel_crtc->pipe; - /* - * We only populate skl_results on watermark updates, and if the - * plane's visiblity isn't actually changing neither is its watermarks. - */ - if (!crtc->primary->state->visible) - skl_write_plane_wm(intel_crtc, p_wm, - &dev_priv->wm.skl_results.ddb, 0); - I915_WRITE(PLANE_CTL(pipe, 0), 0); I915_WRITE(PLANE_SURF(pipe, 0), 0); POSTING_READ(PLANE_SURF(pipe, 0)); @@ -5096,6 +5089,8 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) struct drm_plane_state *old_pri_state = drm_atomic_get_existing_plane_state(old_state, primary); bool modeset = needs_modeset(&pipe_config->base); + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); if (old_pri_state) { struct intel_plane_state *primary_state = @@ -5163,7 +5158,8 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) * us to. */ if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(pipe_config); + dev_priv->display.initial_watermarks(old_intel_state, + pipe_config); else if (pipe_config->update_wm_pre) intel_update_watermarks(crtc); } @@ -5319,6 +5315,8 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe; + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); if (WARN_ON(intel_crtc->active)) return; @@ -5377,7 +5375,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(intel_crtc->config); + dev_priv->display.initial_watermarks(old_intel_state, intel_crtc->config); intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) @@ -5413,6 +5411,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_crtc->pipe, hsw_workaround_pipe; enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); if (WARN_ON(intel_crtc->active)) return; @@ -5483,7 +5483,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_ddi_enable_transcoder_func(crtc); if (dev_priv->display.initial_watermarks != NULL) - dev_priv->display.initial_watermarks(pipe_config); + dev_priv->display.initial_watermarks(old_intel_state, + pipe_config); else intel_update_watermarks(intel_crtc); @@ -5494,7 +5495,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (intel_crtc->config->has_pch_encoder) lpt_pch_enable(crtc); - if (intel_crtc->config->dp_encoder_is_mst) + if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(crtc, true); assert_vblank_disabled(crtc); @@ -5617,7 +5618,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, if (!transcoder_is_dsi(cpu_transcoder)) intel_disable_pipe(intel_crtc); - if (intel_crtc->config->dp_encoder_is_mst) + if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) @@ -7051,7 +7052,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, } } - if (INTEL_INFO(dev)->num_pipes == 2) + if (INTEL_INFO(dev_priv)->num_pipes == 2) return 0; /* Ivybridge 3 pipe is really complicated */ @@ -8432,7 +8433,7 @@ static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc) } } - if (HAS_PIPE_CXSR(dev)) { + if (HAS_PIPE_CXSR(dev_priv)) { if (intel_crtc->lowfreq_avail) { DRM_DEBUG_KMS("enabling CxSR downclocking\n"); pipeconf |= PIPECONF_CXSR_DOWNCLOCK; @@ -10842,16 +10843,9 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); - const struct skl_wm_values *wm = &dev_priv->wm.skl_results; - const struct skl_plane_wm *p_wm = - &cstate->wm.skl.optimal.planes[PLANE_CURSOR]; int pipe = intel_crtc->pipe; uint32_t cntl = 0; - if (INTEL_GEN(dev_priv) >= 9 && wm->dirty_pipes & drm_crtc_mask(crtc)) - skl_write_cursor_wm(intel_crtc, p_wm, &wm->ddb); - if (plane_state && plane_state->base.visible) { cntl = MCURSOR_GAMMA_ENABLE; switch (plane_state->base.crtc_w) { @@ -10873,7 +10867,7 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base, if (HAS_DDI(dev_priv)) cntl |= CURSOR_PIPE_CSC_ENABLE; - if (plane_state->base.rotation == DRM_ROTATE_180) + if (plane_state->base.rotation & DRM_ROTATE_180) cntl |= CURSOR_ROTATE_180; } @@ -10919,7 +10913,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, /* ILK+ do this automagically */ if (HAS_GMCH_DISPLAY(dev_priv) && - plane_state->base.rotation == DRM_ROTATE_180) { + plane_state->base.rotation & DRM_ROTATE_180) { base += (plane_state->base.crtc_h * plane_state->base.crtc_w - 1) * 4; } @@ -12062,6 +12056,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w) to_intel_framebuffer(crtc->base.primary->fb); struct drm_i915_gem_object *obj = intel_fb->obj; + i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0); intel_pipe_update_start(crtc); @@ -13557,11 +13552,15 @@ static void verify_wm_state(struct drm_crtc *crtc, } static void -verify_connector_state(struct drm_device *dev, struct drm_crtc *crtc) +verify_connector_state(struct drm_device *dev, + struct drm_atomic_state *state, + struct drm_crtc *crtc) { struct drm_connector *connector; + struct drm_connector_state *old_conn_state; + int i; - drm_for_each_connector(connector, dev) { + for_each_connector_in_state(state, connector, old_conn_state, i) { struct drm_encoder *encoder = connector->encoder; struct drm_connector_state *state = connector->state; @@ -13769,15 +13768,16 @@ verify_shared_dpll_state(struct drm_device *dev, struct drm_crtc *crtc, static void intel_modeset_verify_crtc(struct drm_crtc *crtc, - struct drm_crtc_state *old_state, - struct drm_crtc_state *new_state) + struct drm_atomic_state *state, + struct drm_crtc_state *old_state, + struct drm_crtc_state *new_state) { if (!needs_modeset(new_state) && !to_intel_crtc_state(new_state)->update_pipe) return; verify_wm_state(crtc, new_state); - verify_connector_state(crtc->dev, crtc); + verify_connector_state(crtc->dev, state, crtc); verify_crtc_state(crtc, old_state, new_state); verify_shared_dpll_state(crtc->dev, crtc, old_state, new_state); } @@ -13793,10 +13793,11 @@ verify_disabled_dpll_state(struct drm_device *dev) } static void -intel_modeset_verify_disabled(struct drm_device *dev) +intel_modeset_verify_disabled(struct drm_device *dev, + struct drm_atomic_state *state) { verify_encoder_state(dev); - verify_connector_state(dev, NULL); + verify_connector_state(dev, state, NULL); verify_disabled_dpll_state(dev); } @@ -14294,6 +14295,14 @@ static void skl_update_crtcs(struct drm_atomic_state *state, unsigned int updated = 0; bool progress; enum pipe pipe; + int i; + + const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; + + for_each_crtc_in_state(state, crtc, old_crtc_state, i) + /* ignore allocations for crtc's that have been turned off. */ + if (crtc->state->active) + entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; /* * Whenever the number of active pipes changes, we need to make sure we @@ -14302,7 +14311,6 @@ static void skl_update_crtcs(struct drm_atomic_state *state, * cause pipe underruns and other bad stuff. */ do { - int i; progress = false; for_each_crtc_in_state(state, crtc, old_crtc_state, i) { @@ -14313,12 +14321,14 @@ static void skl_update_crtcs(struct drm_atomic_state *state, cstate = to_intel_crtc_state(crtc->state); pipe = intel_crtc->pipe; - if (updated & cmask || !crtc->state->active) + if (updated & cmask || !cstate->base.active) continue; - if (skl_ddb_allocation_overlaps(state, intel_crtc)) + + if (skl_ddb_allocation_overlaps(entries, &cstate->wm.skl.ddb, i)) continue; updated |= cmask; + entries[i] = &cstate->wm.skl.ddb; /* * If this is an already active pipe, it's DDB changed, @@ -14327,7 +14337,7 @@ static void skl_update_crtcs(struct drm_atomic_state *state, * new ddb allocation to take effect. */ if (!skl_ddb_entry_equal(&cstate->wm.skl.ddb, - &intel_crtc->hw_ddb) && + &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb) && !crtc->state->active_changed && intel_state->wm_results.dirty_pipes != updated) vbl_wait = true; @@ -14358,14 +14368,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_wait_for_dependencies(state); - if (intel_state->modeset) { - memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, - sizeof(intel_state->min_pixclk)); - dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->atomic_cdclk_freq = intel_state->cdclk; - + if (intel_state->modeset) intel_display_power_get(dev_priv, POWER_DOMAIN_MODESET); - } for_each_crtc_in_state(state, crtc, old_crtc_state, i) { struct intel_crtc *intel_crtc = to_intel_crtc(crtc); @@ -14398,8 +14402,17 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_check_cpu_fifo_underruns(dev_priv); intel_check_pch_fifo_underruns(dev_priv); - if (!crtc->state->active) - intel_update_watermarks(intel_crtc); + if (!crtc->state->active) { + /* + * Make sure we don't call initial_watermarks + * for ILK-style watermark updates. + */ + if (dev_priv->display.atomic_update_watermarks) + dev_priv->display.initial_watermarks(intel_state, + to_intel_crtc_state(crtc->state)); + else + intel_update_watermarks(intel_crtc); + } } } @@ -14422,7 +14435,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (!intel_can_enable_sagv(state)) intel_disable_sagv(dev_priv); - intel_modeset_verify_disabled(dev); + intel_modeset_verify_disabled(dev, state); } /* Complete the events for pipes that have now been disabled */ @@ -14465,7 +14478,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_cstate = to_intel_crtc_state(crtc->state); if (dev_priv->display.optimize_watermarks) - dev_priv->display.optimize_watermarks(intel_cstate); + dev_priv->display.optimize_watermarks(intel_state, + intel_cstate); } for_each_crtc_in_state(state, crtc, old_crtc_state, i) { @@ -14474,7 +14488,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (put_domains[i]) modeset_put_power_domains(dev_priv, put_domains[i]); - intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state); + intel_modeset_verify_crtc(crtc, state, old_crtc_state, crtc->state); } if (intel_state->modeset && intel_can_enable_sagv(state)) @@ -14594,10 +14608,16 @@ static int intel_atomic_commit(struct drm_device *dev, drm_atomic_helper_swap_state(state, true); dev_priv->wm.distrust_bios_wm = false; - dev_priv->wm.skl_results = intel_state->wm_results; intel_shared_dpll_commit(state); intel_atomic_track_fbs(state); + if (intel_state->modeset) { + memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, + sizeof(intel_state->min_pixclk)); + dev_priv->active_crtcs = intel_state->active_crtcs; + dev_priv->atomic_cdclk_freq = intel_state->cdclk; + } + drm_atomic_state_get(state); INIT_WORK(&state->commit_work, nonblock ? intel_atomic_commit_work : NULL); @@ -14720,8 +14740,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, { struct intel_atomic_state *intel_state = to_intel_atomic_state(new_state->state); - struct drm_device *dev = plane->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_framebuffer *fb = new_state->fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); @@ -14775,10 +14794,12 @@ intel_prepare_plane_fb(struct drm_plane *plane, GFP_KERNEL); if (ret < 0) return ret; + + i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); } if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev)->cursor_needs_physical) { + INTEL_INFO(dev_priv)->cursor_needs_physical) { int align = IS_I830(dev_priv) ? 16 * 1024 : 256; ret = i915_gem_object_attach_phys(obj, align); if (ret) { @@ -14811,7 +14832,7 @@ void intel_cleanup_plane_fb(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct intel_plane_state *old_intel_state; struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@ -14822,7 +14843,7 @@ intel_cleanup_plane_fb(struct drm_plane *plane, return; if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR || - !INTEL_INFO(dev)->cursor_needs_physical)) + !INTEL_INFO(dev_priv)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); } @@ -14900,30 +14921,32 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *intel_cstate = to_intel_crtc_state(crtc->state); - struct intel_crtc_state *old_intel_state = + struct intel_crtc_state *old_intel_cstate = to_intel_crtc_state(old_crtc_state); + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_crtc_state->state); bool modeset = needs_modeset(crtc->state); - enum pipe pipe = intel_crtc->pipe; /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_crtc); if (modeset) - return; + goto out; if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) { intel_color_set_csc(crtc->state); intel_color_load_luts(crtc->state); } - if (intel_cstate->update_pipe) { - intel_update_pipe_config(intel_crtc, old_intel_state); - } else if (INTEL_GEN(dev_priv) >= 9) { + if (intel_cstate->update_pipe) + intel_update_pipe_config(intel_crtc, old_intel_cstate); + else if (INTEL_GEN(dev_priv) >= 9) skl_detach_scalers(intel_crtc); - I915_WRITE(PIPE_WM_LINETIME(pipe), - intel_cstate->wm.skl.optimal.linetime); - } +out: + if (dev_priv->display.atomic_update_watermarks) + dev_priv->display.atomic_update_watermarks(old_intel_state, + intel_cstate); } static void intel_finish_crtc_commit(struct drm_crtc *crtc, @@ -14989,11 +15012,16 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) state->scaler_id = -1; } primary->pipe = pipe; - primary->plane = pipe; + /* + * On gen2/3 only plane A can do FBC, but the panel fitter and LVDS + * port is hooked to pipe B. Hence we want plane A feeding pipe B. + */ + if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) + primary->plane = (enum plane) !pipe; + else + primary->plane = (enum plane) pipe; primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) - primary->plane = !pipe; if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; @@ -15046,6 +15074,10 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) supported_rotations = DRM_ROTATE_0 | DRM_ROTATE_90 | DRM_ROTATE_180 | DRM_ROTATE_270; + } else if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) { + supported_rotations = + DRM_ROTATE_0 | DRM_ROTATE_180 | + DRM_REFLECT_X; } else if (INTEL_GEN(dev_priv) >= 4) { supported_rotations = DRM_ROTATE_0 | DRM_ROTATE_180; @@ -15147,13 +15179,13 @@ intel_update_cursor_plane(struct drm_plane *plane, { struct drm_crtc *crtc = crtc_state->base.crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = plane->dev; + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_i915_gem_object *obj = intel_fb_obj(state->base.fb); uint32_t addr; if (!obj) addr = 0; - else if (!INTEL_INFO(dev)->cursor_needs_physical) + else if (!INTEL_INFO(dev_priv)->cursor_needs_physical) addr = i915_gem_object_ggtt_offset(obj, NULL); else addr = obj->phys_handle->busaddr; @@ -15280,14 +15312,14 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) struct intel_plane *plane; plane = intel_sprite_plane_create(dev_priv, pipe, sprite); - if (!plane) { + if (IS_ERR(plane)) { ret = PTR_ERR(plane); goto fail; } } cursor = intel_cursor_plane_create(dev_priv, pipe); - if (!cursor) { + if (IS_ERR(cursor)) { ret = PTR_ERR(cursor); goto fail; } @@ -15299,16 +15331,8 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) if (ret) goto fail; - /* - * On gen2/3 only plane A can do fbc, but the panel fitter and lvds port - * is hooked to pipe B. Hence we want plane A feeding pipe B. - */ intel_crtc->pipe = pipe; - intel_crtc->plane = (enum plane) pipe; - if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) { - DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); - intel_crtc->plane = !pipe; - } + intel_crtc->plane = primary->plane; intel_crtc->cursor_base = ~0; intel_crtc->cursor_cntl = ~0; @@ -15634,7 +15658,7 @@ static void intel_setup_outputs(struct drm_device *dev) } else if (IS_GEN2(dev_priv)) intel_dvo_init(dev); - if (SUPPORTS_TV(dev)) + if (SUPPORTS_TV(dev_priv)) intel_tv_init(dev); intel_psr_init(dev); @@ -16339,6 +16363,7 @@ static void sanitize_watermarks(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *state; + struct intel_atomic_state *intel_state; struct drm_crtc *crtc; struct drm_crtc_state *cstate; struct drm_modeset_acquire_ctx ctx; @@ -16367,12 +16392,14 @@ retry: if (WARN_ON(IS_ERR(state))) goto fail; + intel_state = to_intel_atomic_state(state); + /* * Hardware readout is the only time we don't want to calculate * intermediate watermarks (since we don't trust the current * watermarks). */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; + intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); if (ret) { @@ -16396,7 +16423,7 @@ retry: struct intel_crtc_state *cs = to_intel_crtc_state(cstate); cs->wm.need_postvbl_update = true; - dev_priv->display.optimize_watermarks(cs); + dev_priv->display.optimize_watermarks(intel_state, cs); } put_state: @@ -16429,7 +16456,7 @@ int intel_modeset_init(struct drm_device *dev) intel_init_pm(dev_priv); - if (INTEL_INFO(dev)->num_pipes == 0) + if (INTEL_INFO(dev_priv)->num_pipes == 0) return 0; /* @@ -16475,8 +16502,8 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.fb_base = ggtt->mappable_base; DRM_DEBUG_KMS("%d display pipe%s available.\n", - INTEL_INFO(dev)->num_pipes, - INTEL_INFO(dev)->num_pipes > 1 ? "s" : ""); + INTEL_INFO(dev_priv)->num_pipes, + INTEL_INFO(dev_priv)->num_pipes > 1 ? "s" : ""); for_each_pipe(dev_priv, pipe) { int ret; @@ -16564,11 +16591,10 @@ static void intel_enable_pipe_a(struct drm_device *dev) static bool intel_check_plane_mapping(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 val; - if (INTEL_INFO(dev)->num_pipes == 1) + if (INTEL_INFO(dev_priv)->num_pipes == 1) return true; val = I915_READ(DSPCNTR(!crtc->plane)); @@ -17321,7 +17347,7 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m, if (!error) return; - err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes); + err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev_priv)->num_pipes); if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) err_printf(m, "PWR_WELL_CTL2: %08x\n", error->power_well_driver); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9df331b3305b..a1b0181f42c4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -942,14 +942,14 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, uint8_t *recv, int recv_size) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = + to_i915(intel_dig_port->base.base.dev); i915_reg_t ch_ctl = intel_dp->aux_ch_ctl_reg; uint32_t aux_clock_divider; int i, ret, recv_bytes; uint32_t status; int try, clock = 0; - bool has_aux_irq = HAS_AUX_IRQ(dev); + bool has_aux_irq = HAS_AUX_IRQ(dev_priv); bool vdd; pps_lock(intel_dp); @@ -1791,9 +1791,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder, trans_dp &= ~TRANS_DP_ENH_FRAMING; I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp); } else { - if (!HAS_PCH_SPLIT(dev_priv) && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv) && - pipe_config->limited_color_range) + if (IS_G4X(dev_priv) && pipe_config->limited_color_range) intel_dp->DP |= DP_COLOR_RANGE_16_235; if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) @@ -2515,8 +2513,7 @@ static void intel_dp_get_config(struct intel_encoder *encoder, pipe_config->base.adjusted_mode.flags |= flags; - if (!HAS_PCH_SPLIT(dev_priv) && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv) && tmp & DP_COLOR_RANGE_16_235) + if (IS_G4X(dev_priv) && tmp & DP_COLOR_RANGE_16_235) pipe_config->limited_color_range = true; pipe_config->lane_count = @@ -2735,7 +2732,8 @@ static void intel_dp_enable_port(struct intel_dp *intel_dp, } static void intel_enable_dp(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_device *dev = encoder->base.dev; @@ -2777,7 +2775,7 @@ static void intel_enable_dp(struct intel_encoder *encoder, if (pipe_config->has_audio) { DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n", pipe_name(pipe)); - intel_audio_codec_enable(encoder); + intel_audio_codec_enable(encoder, pipe_config, conn_state); } } @@ -2787,7 +2785,7 @@ static void g4x_enable_dp(struct intel_encoder *encoder, { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); - intel_enable_dp(encoder, pipe_config); + intel_enable_dp(encoder, pipe_config, conn_state); intel_edp_backlight_on(intel_dp); } @@ -2924,7 +2922,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder, { vlv_phy_pre_encoder_enable(encoder); - intel_enable_dp(encoder, pipe_config); + intel_enable_dp(encoder, pipe_config, conn_state); } static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder, @@ -2942,7 +2940,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder, { chv_phy_pre_encoder_enable(encoder); - intel_enable_dp(encoder, pipe_config); + intel_enable_dp(encoder, pipe_config, conn_state); /* Second common lane will stay alive on its own now */ chv_phy_release_cl2_override(encoder); @@ -5742,7 +5740,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, } /* init MST on ports that can support it */ - if (HAS_DP_MST(dev) && !is_edp(intel_dp) && + if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) intel_dp_mst_encoder_init(intel_dig_port, intel_connector->base.base.id); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 3ffbd69e4551..b029d1026a28 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -43,7 +43,6 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; int mst_pbn; - pipe_config->dp_encoder_is_mst = true; pipe_config->has_pch_encoder = false; bpp = 24; /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 398195bf6dd1..5b93455b98b7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -294,6 +294,9 @@ struct intel_connector { */ struct intel_encoder *encoder; + /* ACPI device id for ACPI and driver cooperation */ + u32 acpi_device_id; + /* Reads out the current hw, returning true if the connector is enabled * and active (i.e. dpms ON state). */ bool (*get_hw_state)(struct intel_connector *); @@ -652,7 +655,6 @@ struct intel_crtc_state { bool double_wide; - bool dp_encoder_is_mst; int pbn; struct intel_crtc_scaler_state scaler_state; @@ -728,9 +730,6 @@ struct intel_crtc { bool cxsr_allowed; } wm; - /* gen9+: ddb allocation currently being used */ - struct skl_ddb_entry hw_ddb; - int scanline_offset; struct { @@ -1187,7 +1186,9 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, /* intel_audio.c */ void intel_init_audio_hooks(struct drm_i915_private *dev_priv); -void intel_audio_codec_enable(struct intel_encoder *encoder); +void intel_audio_codec_enable(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_audio_codec_disable(struct intel_encoder *encoder); void i915_audio_component_init(struct drm_i915_private *dev_priv); void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); @@ -1738,18 +1739,9 @@ int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); bool skl_wm_level_equals(const struct skl_wm_level *l1, const struct skl_wm_level *l2); -bool skl_ddb_allocation_equals(const struct skl_ddb_allocation *old, - const struct skl_ddb_allocation *new, - enum pipe pipe); -bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, - struct intel_crtc *intel_crtc); -void skl_write_cursor_wm(struct intel_crtc *intel_crtc, - const struct skl_plane_wm *wm, - const struct skl_ddb_allocation *ddb); -void skl_write_plane_wm(struct intel_crtc *intel_crtc, - const struct skl_plane_wm *wm, - const struct skl_ddb_allocation *ddb, - int plane); +bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, + const struct skl_ddb_entry *ddb, + int ignore); uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 841f8d1e1410..3da4d466e332 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -102,6 +102,9 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; + /* Nothing to do here, execute in order of dependencies */ + engine->schedule = NULL; + dev_priv->engine[id] = engine; return 0; } @@ -236,8 +239,8 @@ static void intel_engine_init_timeline(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - INIT_LIST_HEAD(&engine->execlist_queue); - spin_lock_init(&engine->execlist_lock); + engine->execlist_queue = RB_ROOT; + engine->execlist_first = NULL; intel_engine_init_timeline(engine); intel_engine_init_hangcheck(engine); diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index e230d480c5e6..62f215b12eb5 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -48,17 +48,17 @@ static inline bool fbc_supported(struct drm_i915_private *dev_priv) static inline bool fbc_on_pipe_a_only(struct drm_i915_private *dev_priv) { - return IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8; + return IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8; } static inline bool fbc_on_plane_a_only(struct drm_i915_private *dev_priv) { - return INTEL_INFO(dev_priv)->gen < 4; + return INTEL_GEN(dev_priv) < 4; } static inline bool no_fbc_on_multiple_pipes(struct drm_i915_private *dev_priv) { - return INTEL_INFO(dev_priv)->gen <= 3; + return INTEL_GEN(dev_priv) <= 3; } /* @@ -351,7 +351,7 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) static bool intel_fbc_hw_is_active(struct drm_i915_private *dev_priv) { - if (INTEL_INFO(dev_priv)->gen >= 5) + if (INTEL_GEN(dev_priv) >= 5) return ilk_fbc_is_active(dev_priv); else if (IS_GM45(dev_priv)) return g4x_fbc_is_active(dev_priv); @@ -365,9 +365,9 @@ static void intel_fbc_hw_activate(struct drm_i915_private *dev_priv) fbc->active = true; - if (INTEL_INFO(dev_priv)->gen >= 7) + if (INTEL_GEN(dev_priv) >= 7) gen7_fbc_activate(dev_priv); - else if (INTEL_INFO(dev_priv)->gen >= 5) + else if (INTEL_GEN(dev_priv) >= 5) ilk_fbc_activate(dev_priv); else if (IS_GM45(dev_priv)) g4x_fbc_activate(dev_priv); @@ -381,7 +381,7 @@ static void intel_fbc_hw_deactivate(struct drm_i915_private *dev_priv) fbc->active = false; - if (INTEL_INFO(dev_priv)->gen >= 5) + if (INTEL_GEN(dev_priv) >= 5) ilk_fbc_deactivate(dev_priv); else if (IS_GM45(dev_priv)) g4x_fbc_deactivate(dev_priv); @@ -561,7 +561,7 @@ again: ret = i915_gem_stolen_insert_node_in_range(dev_priv, node, size >>= 1, 4096, 0, end); - if (ret && INTEL_INFO(dev_priv)->gen <= 4) { + if (ret && INTEL_GEN(dev_priv) <= 4) { return 0; } else if (ret) { compression_threshold <<= 1; @@ -594,7 +594,7 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc) fbc->threshold = ret; - if (INTEL_INFO(dev_priv)->gen >= 5) + if (INTEL_GEN(dev_priv) >= 5) I915_WRITE(ILK_DPFC_CB_BASE, fbc->compressed_fb.start); else if (IS_GM45(dev_priv)) { I915_WRITE(DPFC_CB_BASE, fbc->compressed_fb.start); @@ -708,10 +708,10 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) struct intel_fbc *fbc = &dev_priv->fbc; unsigned int effective_w, effective_h, max_w, max_h; - if (INTEL_INFO(dev_priv)->gen >= 8 || IS_HASWELL(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 8 || IS_HASWELL(dev_priv)) { max_w = 4096; max_h = 4096; - } else if (IS_G4X(dev_priv) || INTEL_INFO(dev_priv)->gen >= 5) { + } else if (IS_G4X(dev_priv) || INTEL_GEN(dev_priv) >= 5) { max_w = 4096; max_h = 2048; } else { @@ -812,7 +812,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) fbc->no_fbc_reason = "framebuffer not tiled or fenced"; return false; } - if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) && + if (INTEL_GEN(dev_priv) <= 4 && !IS_G4X(dev_priv) && cache->plane.rotation != DRM_ROTATE_0) { fbc->no_fbc_reason = "rotation unsupported"; return false; @@ -854,9 +854,8 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return true; } -static bool intel_fbc_can_choose(struct intel_crtc *crtc) +static bool intel_fbc_can_enable(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_fbc *fbc = &dev_priv->fbc; if (intel_vgpu_active(dev_priv)) { @@ -874,16 +873,6 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc) return false; } - if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) { - fbc->no_fbc_reason = "no enabled pipes can have FBC"; - return false; - } - - if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A) { - fbc->no_fbc_reason = "no enabled planes can have FBC"; - return false; - } - return true; } @@ -1066,23 +1055,19 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, struct drm_atomic_state *state) { struct intel_fbc *fbc = &dev_priv->fbc; - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; struct drm_plane *plane; struct drm_plane_state *plane_state; - bool fbc_crtc_present = false; - int i, j; + bool crtc_chosen = false; + int i; mutex_lock(&fbc->lock); - for_each_crtc_in_state(state, crtc, crtc_state, i) { - if (fbc->crtc == to_intel_crtc(crtc)) { - fbc_crtc_present = true; - break; - } - } - /* This atomic commit doesn't involve the CRTC currently tied to FBC. */ - if (!fbc_crtc_present && fbc->crtc != NULL) + /* Does this atomic commit involve the CRTC currently tied to FBC? */ + if (fbc->crtc && + !drm_atomic_get_existing_crtc_state(state, &fbc->crtc->base)) + goto out; + + if (!intel_fbc_can_enable(dev_priv)) goto out; /* Simply choose the first CRTC that is compatible and has a visible @@ -1092,25 +1077,29 @@ void intel_fbc_choose_crtc(struct drm_i915_private *dev_priv, for_each_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); + struct intel_crtc_state *intel_crtc_state; + struct intel_crtc *crtc = to_intel_crtc(plane_state->crtc); if (!intel_plane_state->base.visible) continue; - for_each_crtc_in_state(state, crtc, crtc_state, j) { - struct intel_crtc_state *intel_crtc_state = - to_intel_crtc_state(crtc_state); + if (fbc_on_pipe_a_only(dev_priv) && crtc->pipe != PIPE_A) + continue; - if (plane_state->crtc != crtc) - continue; + if (fbc_on_plane_a_only(dev_priv) && crtc->plane != PLANE_A) + continue; - if (!intel_fbc_can_choose(to_intel_crtc(crtc))) - break; + intel_crtc_state = to_intel_crtc_state( + drm_atomic_get_existing_crtc_state(state, &crtc->base)); - intel_crtc_state->enable_fbc = true; - goto out; - } + intel_crtc_state->enable_fbc = true; + crtc_chosen = true; + break; } + if (!crtc_chosen) + fbc->no_fbc_reason = "no suitable CRTC for FBC"; + out: mutex_unlock(&fbc->lock); } @@ -1386,7 +1375,7 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) } /* This value was pulled out of someone's hat */ - if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_GM45(dev_priv)) + if (INTEL_GEN(dev_priv) <= 4 && !IS_GM45(dev_priv)) I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT); /* We still don't have any sort of hardware state readout for FBC, so diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index e87221bba475..fc958d5ed0dc 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -356,7 +356,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, struct drm_fb_offset *offsets, bool *enabled, int width, int height) { - struct drm_device *dev = fb_helper->dev; + struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); unsigned long conn_configured, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; @@ -509,7 +509,7 @@ retry: * fbdev helper library. */ if (num_connectors_enabled != num_connectors_detected && - num_connectors_enabled < INTEL_INFO(dev)->num_pipes) { + num_connectors_enabled < INTEL_INFO(dev_priv)->num_pipes) { DRM_DEBUG_KMS("fallback: Not all outputs enabled\n"); DRM_DEBUG_KMS("Enabled: %i, detected: %i\n", num_connectors_enabled, num_connectors_detected); @@ -697,11 +697,11 @@ static void intel_fbdev_suspend_worker(struct work_struct *work) int intel_fbdev_init(struct drm_device *dev) { - struct intel_fbdev *ifbdev; struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_fbdev *ifbdev; int ret; - if (WARN_ON(INTEL_INFO(dev)->num_pipes == 0)) + if (WARN_ON(INTEL_INFO(dev_priv)->num_pipes == 0)) return -ENODEV; ifbdev = kzalloc(sizeof(struct intel_fbdev), GFP_KERNEL); @@ -714,7 +714,7 @@ int intel_fbdev_init(struct drm_device *dev) ifbdev->preferred_bpp = 32; ret = drm_fb_helper_init(dev, &ifbdev->helper, - INTEL_INFO(dev)->num_pipes, 4); + INTEL_INFO(dev_priv)->num_pipes, 4); if (ret) { kfree(ifbdev); return ret; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 1aa85236b788..34d6ad2cf7c1 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -566,7 +566,7 @@ fail: ret = 0; } - if (err == 0 && !HAS_GUC_UCODE(dev)) + if (err == 0 && !HAS_GUC_UCODE(dev_priv)) ; /* Don't mention the GuC! */ else if (err == 0) DRM_INFO("GuC firmware load skipped\n"); @@ -725,18 +725,18 @@ void intel_guc_init(struct drm_device *dev) struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; const char *fw_path; - if (!HAS_GUC(dev)) { + if (!HAS_GUC(dev_priv)) { i915.enable_guc_loading = 0; i915.enable_guc_submission = 0; } else { /* A negative value means "use platform default" */ if (i915.enable_guc_loading < 0) - i915.enable_guc_loading = HAS_GUC_UCODE(dev); + i915.enable_guc_loading = HAS_GUC_UCODE(dev_priv); if (i915.enable_guc_submission < 0) - i915.enable_guc_submission = HAS_GUC_SCHED(dev); + i915.enable_guc_submission = HAS_GUC_SCHED(dev_priv); } - if (!HAS_GUC_UCODE(dev)) { + if (!HAS_GUC_UCODE(dev_priv)) { fw_path = NULL; } else if (IS_SKYLAKE(dev_priv)) { fw_path = I915_SKL_GUC_UCODE; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 35ada4e1c6cf..fb88e32e25a3 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -975,14 +975,16 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder, pipe_config->lane_count = 4; } -static void intel_enable_hdmi_audio(struct intel_encoder *encoder) +static void intel_enable_hdmi_audio(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config, + struct drm_connector_state *conn_state) { struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); WARN_ON(!crtc->config->has_hdmi_sink); DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n", pipe_name(crtc->pipe)); - intel_audio_codec_enable(encoder); + intel_audio_codec_enable(encoder, pipe_config, conn_state); } static void g4x_enable_hdmi(struct intel_encoder *encoder, @@ -991,21 +993,20 @@ static void g4x_enable_hdmi(struct intel_encoder *encoder, { struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); u32 temp; temp = I915_READ(intel_hdmi->hdmi_reg); temp |= SDVO_ENABLE; - if (crtc->config->has_audio) + if (pipe_config->has_audio) temp |= SDVO_AUDIO_ENABLE; I915_WRITE(intel_hdmi->hdmi_reg, temp); POSTING_READ(intel_hdmi->hdmi_reg); - if (crtc->config->has_audio) - intel_enable_hdmi_audio(encoder); + if (pipe_config->has_audio) + intel_enable_hdmi_audio(encoder, pipe_config, conn_state); } static void ibx_enable_hdmi(struct intel_encoder *encoder, @@ -1040,8 +1041,8 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder, * FIXME: BSpec says this should be done at the end of * of the modeset sequence, so not sure if this isn't too soon. */ - if (crtc->config->pipe_bpp > 24 && - crtc->config->pixel_multiplier > 1) { + if (pipe_config->pipe_bpp > 24 && + pipe_config->pixel_multiplier > 1) { I915_WRITE(intel_hdmi->hdmi_reg, temp & ~SDVO_ENABLE); POSTING_READ(intel_hdmi->hdmi_reg); @@ -1055,8 +1056,8 @@ static void ibx_enable_hdmi(struct intel_encoder *encoder, POSTING_READ(intel_hdmi->hdmi_reg); } - if (crtc->config->has_audio) - intel_enable_hdmi_audio(encoder); + if (pipe_config->has_audio) + intel_enable_hdmi_audio(encoder, pipe_config, conn_state); } static void cpt_enable_hdmi(struct intel_encoder *encoder, @@ -1073,7 +1074,7 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, temp = I915_READ(intel_hdmi->hdmi_reg); temp |= SDVO_ENABLE; - if (crtc->config->has_audio) + if (pipe_config->has_audio) temp |= SDVO_AUDIO_ENABLE; /* @@ -1086,7 +1087,7 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, * 4. enable HDMI clock gating */ - if (crtc->config->pipe_bpp > 24) { + if (pipe_config->pipe_bpp > 24) { I915_WRITE(TRANS_CHICKEN1(pipe), I915_READ(TRANS_CHICKEN1(pipe)) | TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE); @@ -1098,7 +1099,7 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, I915_WRITE(intel_hdmi->hdmi_reg, temp); POSTING_READ(intel_hdmi->hdmi_reg); - if (crtc->config->pipe_bpp > 24) { + if (pipe_config->pipe_bpp > 24) { temp &= ~SDVO_COLOR_FORMAT_MASK; temp |= HDMI_COLOR_FORMAT_12bpc; @@ -1110,8 +1111,8 @@ static void cpt_enable_hdmi(struct intel_encoder *encoder, ~TRANS_CHICKEN1_HDMIUNIT_GC_DISABLE); } - if (crtc->config->has_audio) - intel_enable_hdmi_audio(encoder); + if (pipe_config->has_audio) + intel_enable_hdmi_audio(encoder, pipe_config, conn_state); } static void vlv_enable_hdmi(struct intel_encoder *encoder, @@ -1178,9 +1179,7 @@ static void g4x_disable_hdmi(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - - if (crtc->config->has_audio) + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); intel_disable_hdmi(encoder, old_crtc_state, old_conn_state); @@ -1190,9 +1189,7 @@ static void pch_disable_hdmi(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { - struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); - - if (crtc->config->has_audio) + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); } @@ -1645,13 +1642,12 @@ static void intel_hdmi_pre_enable(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; intel_hdmi_prepare(encoder); intel_hdmi->set_infoframes(&encoder->base, - intel_crtc->config->has_hdmi_sink, + pipe_config->has_hdmi_sink, adjusted_mode); } @@ -1663,9 +1659,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, struct intel_hdmi *intel_hdmi = &dport->hdmi; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = - to_intel_crtc(encoder->base.crtc); - const struct drm_display_mode *adjusted_mode = &intel_crtc->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; vlv_phy_pre_encoder_enable(encoder); @@ -1674,7 +1668,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder, 0x2b247878); intel_hdmi->set_infoframes(&encoder->base, - intel_crtc->config->has_hdmi_sink, + pipe_config->has_hdmi_sink, adjusted_mode); g4x_enable_hdmi(encoder, pipe_config, conn_state); diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 334d47b5811a..3d546c019de0 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -501,7 +501,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work) if (intel_connector->mst_port) continue; - if (!connector->polled && I915_HAS_HOTPLUG(dev) && + if (!connector->polled && I915_HAS_HOTPLUG(dev_priv) && intel_connector->encoder->hpd_pin > HPD_NONE) { connector->polled = enabled ? DRM_CONNECTOR_POLL_CONNECT | diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index dde04b7643b1..0a09024d6ca3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -432,8 +432,10 @@ static bool can_merge_ctx(const struct i915_gem_context *prev, static void execlists_dequeue(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *cursor, *last; + struct drm_i915_gem_request *last; struct execlist_port *port = engine->execlist_port; + unsigned long flags; + struct rb_node *rb; bool submit = false; last = port->request; @@ -469,8 +471,12 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock(&engine->execlist_lock); - list_for_each_entry(cursor, &engine->execlist_queue, execlist_link) { + spin_lock_irqsave(&engine->timeline->lock, flags); + rb = engine->execlist_first; + while (rb) { + struct drm_i915_gem_request *cursor = + rb_entry(rb, typeof(*cursor), priotree.node); + /* Can we combine this request with the current port? It has to * be the same context/ringbuffer and not have any exceptions * (e.g. GVT saying never to combine contexts). @@ -493,7 +499,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * context (even though a different request) to * the second port. */ - if (ctx_single_port_submission(cursor->ctx)) + if (ctx_single_port_submission(last->ctx) || + ctx_single_port_submission(cursor->ctx)) break; GEM_BUG_ON(last->ctx == cursor->ctx); @@ -501,17 +508,30 @@ static void execlists_dequeue(struct intel_engine_cs *engine) i915_gem_request_assign(&port->request, last); port++; } + + rb = rb_next(rb); + rb_erase(&cursor->priotree.node, &engine->execlist_queue); + RB_CLEAR_NODE(&cursor->priotree.node); + cursor->priotree.priority = INT_MAX; + + /* We keep the previous context alive until we retire the + * following request. This ensures that any the context object + * is still pinned for any residual writes the HW makes into it + * on the context switch into the next object following the + * breadcrumb. Otherwise, we may retire the context too early. + */ + cursor->previous_context = engine->last_context; + engine->last_context = cursor->ctx; + + __i915_gem_request_submit(cursor); last = cursor; submit = true; } if (submit) { - /* Decouple all the requests submitted from the queue */ - engine->execlist_queue.next = &cursor->execlist_link; - cursor->execlist_link.prev = &engine->execlist_queue; - i915_gem_request_assign(&port->request, last); + engine->execlist_first = rb; } - spin_unlock(&engine->execlist_lock); + spin_unlock_irqrestore(&engine->timeline->lock, flags); if (submit) execlists_submit_ports(engine); @@ -614,27 +634,147 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_put(dev_priv, engine->fw_domains); } +static bool insert_request(struct i915_priotree *pt, struct rb_root *root) +{ + struct rb_node **p, *rb; + bool first = true; + + /* most positive priority is scheduled first, equal priorities fifo */ + rb = NULL; + p = &root->rb_node; + while (*p) { + struct i915_priotree *pos; + + rb = *p; + pos = rb_entry(rb, typeof(*pos), node); + if (pt->priority > pos->priority) { + p = &rb->rb_left; + } else { + p = &rb->rb_right; + first = false; + } + } + rb_link_node(&pt->node, rb, p); + rb_insert_color(&pt->node, root); + + return first; +} + static void execlists_submit_request(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; unsigned long flags; - spin_lock_irqsave(&engine->execlist_lock, flags); + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); - /* We keep the previous context alive until we retire the following - * request. This ensures that any the context object is still pinned - * for any residual writes the HW makes into it on the context switch - * into the next object following the breadcrumb. Otherwise, we may - * retire the context too early. - */ - request->previous_context = engine->last_context; - engine->last_context = request->ctx; - - list_add_tail(&request->execlist_link, &engine->execlist_queue); + if (insert_request(&request->priotree, &engine->execlist_queue)) + engine->execlist_first = &request->priotree.node; if (execlists_elsp_idle(engine)) tasklet_hi_schedule(&engine->irq_tasklet); - spin_unlock_irqrestore(&engine->execlist_lock, flags); + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + +static struct intel_engine_cs * +pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +{ + struct intel_engine_cs *engine; + + engine = container_of(pt, + struct drm_i915_gem_request, + priotree)->engine; + if (engine != locked) { + if (locked) + spin_unlock_irq(&locked->timeline->lock); + spin_lock_irq(&engine->timeline->lock); + } + + return engine; +} + +static void execlists_schedule(struct drm_i915_gem_request *request, int prio) +{ + static DEFINE_MUTEX(lock); + struct intel_engine_cs *engine = NULL; + struct i915_dependency *dep, *p; + struct i915_dependency stack; + LIST_HEAD(dfs); + + if (prio <= READ_ONCE(request->priotree.priority)) + return; + + /* Need global lock to use the temporary link inside i915_dependency */ + mutex_lock(&lock); + + stack.signaler = &request->priotree; + list_add(&stack.dfs_link, &dfs); + + /* Recursively bump all dependent priorities to match the new request. + * + * A naive approach would be to use recursion: + * static void update_priorities(struct i915_priotree *pt, prio) { + * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * update_priorities(dep->signal, prio) + * insert_request(pt); + * } + * but that may have unlimited recursion depth and so runs a very + * real risk of overunning the kernel stack. Instead, we build + * a flat list of all dependencies starting with the current request. + * As we walk the list of dependencies, we add all of its dependencies + * to the end of the list (this may include an already visited + * request) and continue to walk onwards onto the new dependencies. The + * end result is a topological list of requests in reverse order, the + * last element in the list is the request we must execute first. + */ + list_for_each_entry_safe(dep, p, &dfs, dfs_link) { + struct i915_priotree *pt = dep->signaler; + + list_for_each_entry(p, &pt->signalers_list, signal_link) + if (prio > READ_ONCE(p->signaler->priority)) + list_move_tail(&p->dfs_link, &dfs); + + p = list_next_entry(dep, dfs_link); + if (!RB_EMPTY_NODE(&pt->node)) + continue; + + engine = pt_lock_engine(pt, engine); + + /* If it is not already in the rbtree, we can update the + * priority inplace and skip over it (and its dependencies) + * if it is referenced *again* as we descend the dfs. + */ + if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) { + pt->priority = prio; + list_del_init(&dep->dfs_link); + } + } + + /* Fifo and depth-first replacement ensure our deps execute before us */ + list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_priotree *pt = dep->signaler; + + INIT_LIST_HEAD(&dep->dfs_link); + + engine = pt_lock_engine(pt, engine); + + if (prio <= pt->priority) + continue; + + GEM_BUG_ON(RB_EMPTY_NODE(&pt->node)); + + pt->priority = prio; + rb_erase(&pt->node, &engine->execlist_queue); + if (insert_request(pt, &engine->execlist_queue)) + engine->execlist_first = &pt->node; + } + + if (engine) + spin_unlock_irq(&engine->timeline->lock); + + mutex_unlock(&lock); + + /* XXX Do we need to preempt to make room for us and our deps? */ } int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) @@ -1673,8 +1813,10 @@ void intel_execlists_enable_submission(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { engine->submit_request = execlists_submit_request; + engine->schedule = execlists_schedule; + } } static void @@ -1687,6 +1829,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; engine->submit_request = execlists_submit_request; + engine->schedule = execlists_schedule; engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index 7acbbbf97833..f4429f67a4e3 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -642,24 +642,6 @@ static struct notifier_block intel_opregion_notifier = { * (version 3) */ -static u32 get_did(struct intel_opregion *opregion, int i) -{ - u32 did; - - if (i < ARRAY_SIZE(opregion->acpi->didl)) { - did = opregion->acpi->didl[i]; - } else { - i -= ARRAY_SIZE(opregion->acpi->didl); - - if (WARN_ON(i >= ARRAY_SIZE(opregion->acpi->did2))) - return 0; - - did = opregion->acpi->did2[i]; - } - - return did; -} - static void set_did(struct intel_opregion *opregion, int i, u32 val) { if (i < ARRAY_SIZE(opregion->acpi->didl)) { @@ -674,11 +656,11 @@ static void set_did(struct intel_opregion *opregion, int i, u32 val) } } -static u32 acpi_display_type(struct drm_connector *connector) +static u32 acpi_display_type(struct intel_connector *connector) { u32 display_type; - switch (connector->connector_type) { + switch (connector->base.connector_type) { case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_DVIA: display_type = ACPI_DISPLAY_TYPE_VGA; @@ -707,7 +689,7 @@ static u32 acpi_display_type(struct drm_connector *connector) display_type = ACPI_DISPLAY_TYPE_OTHER; break; default: - MISSING_CASE(connector->connector_type); + MISSING_CASE(connector->base.connector_type); display_type = ACPI_DISPLAY_TYPE_OTHER; break; } @@ -718,34 +700,9 @@ static u32 acpi_display_type(struct drm_connector *connector) static void intel_didl_outputs(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->opregion; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_connector *connector; - acpi_handle handle; - struct acpi_device *acpi_dev, *acpi_cdev, *acpi_video_bus = NULL; - unsigned long long device_id; - acpi_status status; - u32 temp, max_outputs; - int i = 0; - - handle = ACPI_HANDLE(&pdev->dev); - if (!handle || acpi_bus_get_device(handle, &acpi_dev)) - return; - - if (acpi_is_video_device(handle)) - acpi_video_bus = acpi_dev; - else { - list_for_each_entry(acpi_cdev, &acpi_dev->children, node) { - if (acpi_is_video_device(acpi_cdev->handle)) { - acpi_video_bus = acpi_cdev; - break; - } - } - } - - if (!acpi_video_bus) { - DRM_DEBUG_KMS("No ACPI video bus found\n"); - return; - } + struct intel_connector *connector; + int i = 0, max_outputs; + int display_index[16] = {}; /* * In theory, did2, the extended didl, gets added at opregion version @@ -757,64 +714,58 @@ static void intel_didl_outputs(struct drm_i915_private *dev_priv) max_outputs = ARRAY_SIZE(opregion->acpi->didl) + ARRAY_SIZE(opregion->acpi->did2); - list_for_each_entry(acpi_cdev, &acpi_video_bus->children, node) { - if (i >= max_outputs) { - DRM_DEBUG_KMS("More than %u outputs detected via ACPI\n", - max_outputs); - return; - } - status = acpi_evaluate_integer(acpi_cdev->handle, "_ADR", - NULL, &device_id); - if (ACPI_SUCCESS(status)) { - if (!device_id) - goto blind_set; - set_did(opregion, i++, (u32)(device_id & 0x0f0f)); - } + for_each_intel_connector(&dev_priv->drm, connector) { + u32 device_id, type; + + device_id = acpi_display_type(connector); + + /* Use display type specific display index. */ + type = (device_id & ACPI_DISPLAY_TYPE_MASK) + >> ACPI_DISPLAY_TYPE_SHIFT; + device_id |= display_index[type]++ << ACPI_DISPLAY_INDEX_SHIFT; + + connector->acpi_device_id = device_id; + if (i < max_outputs) + set_did(opregion, i, device_id); + i++; } -end: DRM_DEBUG_KMS("%d outputs detected\n", i); + if (i > max_outputs) + DRM_ERROR("More than %d outputs in connector list\n", + max_outputs); + /* If fewer than max outputs, the list must be null terminated */ if (i < max_outputs) set_did(opregion, i, 0); - return; - -blind_set: - i = 0; - list_for_each_entry(connector, - &dev_priv->drm.mode_config.connector_list, head) { - int display_type = acpi_display_type(connector); - - if (i >= max_outputs) { - DRM_DEBUG_KMS("More than %u outputs in connector list\n", - max_outputs); - return; - } - - temp = get_did(opregion, i); - set_did(opregion, i, temp | (1 << 31) | display_type | i); - i++; - } - goto end; } static void intel_setup_cadls(struct drm_i915_private *dev_priv) { struct intel_opregion *opregion = &dev_priv->opregion; + struct intel_connector *connector; int i = 0; - u32 disp_id; - - /* Initialize the CADL field by duplicating the DIDL values. - * Technically, this is not always correct as display outputs may exist, - * but not active. This initialization is necessary for some Clevo - * laptops that check this field before processing the brightness and - * display switching hotkeys. Just like DIDL, CADL is NULL-terminated if - * there are less than eight devices. */ - do { - disp_id = get_did(opregion, i); - opregion->acpi->cadl[i] = disp_id; - } while (++i < 8 && disp_id != 0); + + /* + * Initialize the CADL field from the connector device ids. This is + * essentially the same as copying from the DIDL. Technically, this is + * not always correct as display outputs may exist, but not active. This + * initialization is necessary for some Clevo laptops that check this + * field before processing the brightness and display switching hotkeys. + * + * Note that internal panels should be at the front of the connector + * list already, ensuring they're not left out. + */ + for_each_intel_connector(&dev_priv->drm, connector) { + if (i >= ARRAY_SIZE(opregion->acpi->cadl)) + break; + opregion->acpi->cadl[i++] = connector->acpi_device_id; + } + + /* If fewer than 8 active devices, the list must be null terminated */ + if (i < ARRAY_SIZE(opregion->acpi->cadl)) + opregion->acpi->cadl[i] = 0; } void intel_opregion_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cc9e0c0f445f..1331bcc41868 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1061,7 +1061,8 @@ static void vlv_invert_wms(struct intel_crtc *crtc) for (level = 0; level < wm_state->num_levels; level++) { struct drm_device *dev = crtc->base.dev; - const int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; + const int sr_fifo_size = + INTEL_INFO(to_i915(dev))->num_pipes * 512 - 1; struct intel_plane *plane; wm_state->sr[level].plane = sr_fifo_size - wm_state->sr[level].plane; @@ -1091,15 +1092,16 @@ static void vlv_invert_wms(struct intel_crtc *crtc) static void vlv_compute_wm(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_state *wm_state = &crtc->wm_state; struct intel_plane *plane; - int sr_fifo_size = INTEL_INFO(dev)->num_pipes * 512 - 1; + int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; int level; memset(wm_state, 0, sizeof(*wm_state)); wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; - wm_state->num_levels = to_i915(dev)->wm.max_level + 1; + wm_state->num_levels = dev_priv->wm.max_level + 1; wm_state->num_active_planes = 0; @@ -1179,7 +1181,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) } /* clear any (partially) filled invalid levels */ - for (level = wm_state->num_levels; level < to_i915(dev)->wm.max_level + 1; level++) { + for (level = wm_state->num_levels; level < dev_priv->wm.max_level + 1; level++) { memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); } @@ -1920,7 +1922,7 @@ static unsigned int ilk_plane_wm_max(const struct drm_device *dev, /* HSW allows LP1+ watermarks even with multiple pipes */ if (level == 0 || config->num_pipes_active > 1) { - fifo_size /= INTEL_INFO(dev)->num_pipes; + fifo_size /= INTEL_INFO(to_i915(dev))->num_pipes; /* * For some reason the non self refresh @@ -3118,7 +3120,11 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, * we currently hold. */ if (!intel_state->active_pipe_changes) { - *alloc = to_intel_crtc(for_crtc)->hw_ddb; + /* + * alloc may be cleared by clear_intel_crtc_state, + * copy from old state to be sure + */ + *alloc = to_intel_crtc_state(for_crtc->state)->wm.skl.ddb; return; } @@ -3624,6 +3630,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, y_min_scanlines = 4; } + if (apply_memory_bw_wa) + y_min_scanlines *= 2; + plane_bytes_per_line = width * cpp; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { @@ -3644,8 +3653,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, plane_blocks_per_line); y_tile_minimum = plane_blocks_per_line * y_min_scanlines; - if (apply_memory_bw_wa) - y_tile_minimum *= 2; if (fb->modifier[0] == I915_FORMAT_MOD_Y_TILED || fb->modifier[0] == I915_FORMAT_MOD_Yf_TILED) { @@ -3906,25 +3913,16 @@ static inline bool skl_ddb_entries_overlap(const struct skl_ddb_entry *a, return a->start < b->end && b->start < a->end; } -bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, - struct intel_crtc *intel_crtc) +bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, + const struct skl_ddb_entry *ddb, + int ignore) { - struct drm_crtc *other_crtc; - struct drm_crtc_state *other_cstate; - struct intel_crtc *other_intel_crtc; - const struct skl_ddb_entry *ddb = - &to_intel_crtc_state(intel_crtc->base.state)->wm.skl.ddb; int i; - for_each_crtc_in_state(state, other_crtc, other_cstate, i) { - other_intel_crtc = to_intel_crtc(other_crtc); - - if (other_intel_crtc == intel_crtc) - continue; - - if (skl_ddb_entries_overlap(ddb, &other_intel_crtc->hw_ddb)) + for (i = 0; i < I915_MAX_PIPES; i++) + if (i != ignore && entries[i] && + skl_ddb_entries_overlap(ddb, entries[i])) return true; - } return false; } @@ -4193,14 +4191,35 @@ skl_compute_wm(struct drm_atomic_state *state) return 0; } -static void skl_update_wm(struct intel_crtc *intel_crtc) +static void skl_atomic_update_crtc_wm(struct intel_atomic_state *state, + struct intel_crtc_state *cstate) +{ + struct intel_crtc *crtc = to_intel_crtc(cstate->base.crtc); + struct drm_i915_private *dev_priv = to_i915(state->base.dev); + struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; + const struct skl_ddb_allocation *ddb = &state->wm_results.ddb; + enum pipe pipe = crtc->pipe; + int plane; + + if (!(state->wm_results.dirty_pipes & drm_crtc_mask(&crtc->base))) + return; + + I915_WRITE(PIPE_WM_LINETIME(pipe), pipe_wm->linetime); + + for_each_universal_plane(dev_priv, pipe, plane) + skl_write_plane_wm(crtc, &pipe_wm->planes[plane], ddb, plane); + + skl_write_cursor_wm(crtc, &pipe_wm->planes[PLANE_CURSOR], ddb); +} + +static void skl_initial_wm(struct intel_atomic_state *state, + struct intel_crtc_state *cstate) { + struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_wm_values *results = &state->wm_results; struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; - struct intel_crtc_state *cstate = to_intel_crtc_state(intel_crtc->base.state); - struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; enum pipe pipe = intel_crtc->pipe; if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) @@ -4208,27 +4227,11 @@ static void skl_update_wm(struct intel_crtc *intel_crtc) mutex_lock(&dev_priv->wm.wm_mutex); - /* - * If this pipe isn't active already, we're going to be enabling it - * very soon. Since it's safe to update a pipe's ddb allocation while - * the pipe's shut off, just do so here. Already active pipes will have - * their watermarks updated once we update their planes. - */ - if (intel_crtc->base.state->active_changed) { - int plane; - - for_each_universal_plane(dev_priv, pipe, plane) - skl_write_plane_wm(intel_crtc, &pipe_wm->planes[plane], - &results->ddb, plane); - - skl_write_cursor_wm(intel_crtc, &pipe_wm->planes[PLANE_CURSOR], - &results->ddb); - } + if (cstate->base.active_changed) + skl_atomic_update_crtc_wm(state, cstate); skl_copy_wm_for_pipe(hw_vals, results, pipe); - intel_crtc->hw_ddb = cstate->wm.skl.ddb; - mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -4283,7 +4286,8 @@ static void ilk_program_watermarks(struct drm_i915_private *dev_priv) ilk_write_wm_values(dev_priv, &results); } -static void ilk_initial_watermarks(struct intel_crtc_state *cstate) +static void ilk_initial_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *cstate) { struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); @@ -4294,7 +4298,8 @@ static void ilk_initial_watermarks(struct intel_crtc_state *cstate) mutex_unlock(&dev_priv->wm.wm_mutex); } -static void ilk_optimize_watermarks(struct intel_crtc_state *cstate) +static void ilk_optimize_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *cstate) { struct drm_i915_private *dev_priv = to_i915(cstate->base.crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); @@ -7690,7 +7695,8 @@ void intel_init_pm(struct drm_i915_private *dev_priv) /* For FIFO watermark updates */ if (INTEL_GEN(dev_priv) >= 9) { skl_setup_wm_latency(dev_priv); - dev_priv->display.update_wm = skl_update_wm; + dev_priv->display.initial_watermarks = skl_initial_wm; + dev_priv->display.atomic_update_watermarks = skl_atomic_update_crtc_wm; dev_priv->display.compute_global_watermarks = skl_compute_wm; } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_setup_wm_latency(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 271a3e29ff23..41e6e920d9d7 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -427,7 +427,7 @@ void intel_psr_enable(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); - if (!HAS_PSR(dev)) { + if (!HAS_PSR(dev_priv)) { DRM_DEBUG_KMS("PSR not supported on this platform\n"); return; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 700e93d80616..aeb637dc1fdf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1294,6 +1294,8 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; + i915_gem_request_submit(request); + I915_WRITE_TAIL(request->engine, request->tail); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 642b54692d0d..3466b4e77e7c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -267,6 +267,15 @@ struct intel_engine_cs { */ void (*submit_request)(struct drm_i915_gem_request *req); + /* Call when the priority on a request has changed and it and its + * dependencies may need rescheduling. Note the request itself may + * not be ready to run! + * + * Called under the struct_mutex. + */ + void (*schedule)(struct drm_i915_gem_request *request, + int priority); + /* Some chipsets are not quite as coherent as advertised and need * an expensive kick to force a true read of the up-to-date seqno. * However, the up-to-date seqno is not always required and the last @@ -335,12 +344,12 @@ struct intel_engine_cs { /* Execlists */ struct tasklet_struct irq_tasklet; - spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */ struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; } execlist_port[2]; - struct list_head execlist_queue; + struct rb_root execlist_queue; + struct rb_node *execlist_first; unsigned int fw_domains; bool disable_lite_restore_wa; bool preempt_wa; @@ -578,7 +587,6 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_kick_waiters(struct drm_i915_private *i915); -unsigned int intel_kick_signalers(struct drm_i915_private *i915); +unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 0e9aac2e3eae..c2b629c922e7 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -203,13 +203,8 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(drm_plane); struct drm_framebuffer *fb = plane_state->base.fb; - const struct skl_wm_values *wm = &dev_priv->wm.skl_results; - struct drm_crtc *crtc = crtc_state->base.crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); const int pipe = intel_plane->pipe; const int plane = intel_plane->plane + 1; - const struct skl_plane_wm *p_wm = - &crtc_state->wm.skl.optimal.planes[plane]; u32 plane_ctl; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; u32 surf_addr = plane_state->main.offset; @@ -233,9 +228,6 @@ skl_update_plane(struct drm_plane *drm_plane, plane_ctl |= skl_plane_ctl_rotation(rotation); - if (wm->dirty_pipes & drm_crtc_mask(crtc)) - skl_write_plane_wm(intel_crtc, p_wm, &wm->ddb, plane); - if (key->flags) { I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value); I915_WRITE(PLANE_KEYMAX(pipe, plane), key->max_value); @@ -291,19 +283,9 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc) struct drm_device *dev = dplane->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(dplane); - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); const int pipe = intel_plane->pipe; const int plane = intel_plane->plane + 1; - /* - * We only populate skl_results on watermark updates, and if the - * plane's visiblity isn't actually changing neither is its watermarks. - */ - if (!dplane->state->visible) - skl_write_plane_wm(to_intel_crtc(crtc), - &cstate->wm.skl.optimal.planes[plane], - &dev_priv->wm.skl_results.ddb, plane); - I915_WRITE(PLANE_CTL(pipe, plane), 0); I915_WRITE(PLANE_SURF(pipe, plane), 0); @@ -362,7 +344,7 @@ vlv_update_plane(struct drm_plane *dplane, int plane = intel_plane->plane; u32 sprctl; u32 sprsurf_offset, linear_offset; - unsigned int rotation = dplane->state->rotation; + unsigned int rotation = plane_state->base.rotation; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; @@ -427,6 +409,12 @@ vlv_update_plane(struct drm_plane *dplane, if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) sprctl |= SP_TILED; + if (rotation & DRM_ROTATE_180) + sprctl |= SP_ROTATE_180; + + if (rotation & DRM_REFLECT_X) + sprctl |= SP_MIRROR; + /* Sizes are 0 based */ src_w--; src_h--; @@ -436,11 +424,11 @@ vlv_update_plane(struct drm_plane *dplane, intel_add_fb_offsets(&x, &y, plane_state, 0); sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == DRM_ROTATE_180) { - sprctl |= SP_ROTATE_180; - + if (rotation & DRM_ROTATE_180) { x += src_w; y += src_h; + } else if (rotation & DRM_REFLECT_X) { + x += src_w; } linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); @@ -546,6 +534,9 @@ ivb_update_plane(struct drm_plane *plane, if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) sprctl |= SPRITE_TILED; + if (rotation & DRM_ROTATE_180) + sprctl |= SPRITE_ROTATE_180; + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE; else @@ -566,14 +557,11 @@ ivb_update_plane(struct drm_plane *plane, intel_add_fb_offsets(&x, &y, plane_state, 0); sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == DRM_ROTATE_180) { - sprctl |= SPRITE_ROTATE_180; - - /* HSW and BDW does this automagically in hardware */ - if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) { - x += src_w; - y += src_h; - } + /* HSW+ does this automagically in hardware */ + if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) && + rotation & DRM_ROTATE_180) { + x += src_w; + y += src_h; } linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); @@ -684,6 +672,9 @@ ilk_update_plane(struct drm_plane *plane, if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dvscntr |= DVS_TILED; + if (rotation & DRM_ROTATE_180) + dvscntr |= DVS_ROTATE_180; + if (IS_GEN6(dev_priv)) dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */ @@ -700,9 +691,7 @@ ilk_update_plane(struct drm_plane *plane, intel_add_fb_offsets(&x, &y, plane_state, 0); dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); - if (rotation == DRM_ROTATE_180) { - dvscntr |= DVS_ROTATE_180; - + if (rotation & DRM_ROTATE_180) { x += src_w; y += src_h; } @@ -1112,6 +1101,10 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, supported_rotations = DRM_ROTATE_0 | DRM_ROTATE_90 | DRM_ROTATE_180 | DRM_ROTATE_270; + } else if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) { + supported_rotations = + DRM_ROTATE_0 | DRM_ROTATE_180 | + DRM_REFLECT_X; } else { supported_rotations = DRM_ROTATE_0 | DRM_ROTATE_180; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index e2b188dcf908..a0944dde7c41 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -402,6 +402,8 @@ check_for_unclaimed_mmio(struct drm_i915_private *dev_priv) static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, bool restore_forcewake) { + struct intel_device_info *info = mkwrite_device_info(dev_priv); + /* clear out unclaimed reg detection bit */ if (check_for_unclaimed_mmio(dev_priv)) DRM_DEBUG("unclaimed mmio detected on uncore init, clearing\n"); @@ -419,6 +421,10 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, GT_FIFO_CTL_RC6_POLICY_STALL); } + /* Enable Decoupled MMIO only on BXT C stepping onwards */ + if (!IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + info->has_decoupled_mmio = false; + intel_uncore_forcewake_reset(dev_priv, restore_forcewake); } @@ -783,7 +789,7 @@ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x9400, 0x97ff, FORCEWAKE_RENDER | FORCEWAKE_MEDIA), GEN_FW_RANGE(0x9800, 0xafff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xb000, 0xb47f, FORCEWAKE_RENDER), - GEN_FW_RANGE(0xb480, 0xbfff, FORCEWAKE_BLITTER), + GEN_FW_RANGE(0xb480, 0xcfff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xd000, 0xd7ff, FORCEWAKE_MEDIA), GEN_FW_RANGE(0xd800, 0xdfff, FORCEWAKE_BLITTER), GEN_FW_RANGE(0xe000, 0xe8ff, FORCEWAKE_RENDER), @@ -831,6 +837,66 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv, __unclaimed_reg_debug(dev_priv, reg, read, before); } +static const enum decoupled_power_domain fw2dpd_domain[] = { + GEN9_DECOUPLED_PD_RENDER, + GEN9_DECOUPLED_PD_BLITTER, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_MEDIA, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_ALL, + GEN9_DECOUPLED_PD_ALL +}; + +/* + * Decoupled MMIO access for only 1 DWORD + */ +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv, + u32 reg, + enum forcewake_domains fw_domain, + enum decoupled_ops operation) +{ + enum decoupled_power_domain dp_domain; + u32 ctrl_reg_data = 0; + + dp_domain = fw2dpd_domain[fw_domain - 1]; + + ctrl_reg_data |= reg; + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT); + ctrl_reg_data |= (dp_domain << GEN9_DECOUPLED_PD_SHIFT); + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO; + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data); + + if (wait_for_atomic((__raw_i915_read32(dev_priv, + GEN9_DECOUPLED_REG0_DW1) & + GEN9_DECOUPLED_DW1_GO) == 0, + FORCEWAKE_ACK_TIMEOUT_MS)) + DRM_ERROR("Decoupled MMIO wait timed out\n"); +} + +static inline u32 +__gen9_decoupled_mmio_read32(struct drm_i915_private *dev_priv, + u32 reg, + enum forcewake_domains fw_domain) +{ + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, + GEN9_DECOUPLED_OP_READ); + + return __raw_i915_read32(dev_priv, GEN9_DECOUPLED_REG0_DW0); +} + +static inline void +__gen9_decoupled_mmio_write(struct drm_i915_private *dev_priv, + u32 reg, u32 data, + enum forcewake_domains fw_domain) +{ + + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW0, data); + + __gen9_decoupled_mmio_access(dev_priv, reg, fw_domain, + GEN9_DECOUPLED_OP_WRITE); +} + + #define GEN2_READ_HEADER(x) \ u##x val = 0; \ assert_rpm_wakelock_held(dev_priv); @@ -935,6 +1001,28 @@ fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { GEN6_READ_FOOTER; \ } +#define __gen9_decoupled_read(x) \ +static u##x \ +gen9_decoupled_read##x(struct drm_i915_private *dev_priv, \ + i915_reg_t reg, bool trace) { \ + enum forcewake_domains fw_engine; \ + GEN6_READ_HEADER(x); \ + fw_engine = __fwtable_reg_read_fw_domains(offset); \ + if (fw_engine & ~dev_priv->uncore.fw_domains_active) { \ + unsigned i; \ + u32 *ptr_data = (u32 *) &val; \ + for (i = 0; i < x/32; i++, offset += sizeof(u32), ptr_data++) \ + *ptr_data = __gen9_decoupled_mmio_read32(dev_priv, \ + offset, \ + fw_engine); \ + } else { \ + val = __raw_i915_read##x(dev_priv, reg); \ + } \ + GEN6_READ_FOOTER; \ +} + +__gen9_decoupled_read(32) +__gen9_decoupled_read(64) __fwtable_read(8) __fwtable_read(16) __fwtable_read(32) @@ -1064,6 +1152,25 @@ fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bo GEN6_WRITE_FOOTER; \ } +#define __gen9_decoupled_write(x) \ +static void \ +gen9_decoupled_write##x(struct drm_i915_private *dev_priv, \ + i915_reg_t reg, u##x val, \ + bool trace) { \ + enum forcewake_domains fw_engine; \ + GEN6_WRITE_HEADER; \ + fw_engine = __fwtable_reg_write_fw_domains(offset); \ + if (fw_engine & ~dev_priv->uncore.fw_domains_active) \ + __gen9_decoupled_mmio_write(dev_priv, \ + offset, \ + val, \ + fw_engine); \ + else \ + __raw_i915_write##x(dev_priv, reg, val); \ + GEN6_WRITE_FOOTER; \ +} + +__gen9_decoupled_write(32) __fwtable_write(8) __fwtable_write(16) __fwtable_write(32) @@ -1287,6 +1394,14 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_FW_DOMAINS_TABLE(__gen9_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(fwtable); ASSIGN_READ_MMIO_VFUNCS(fwtable); + if (HAS_DECOUPLED_MMIO(dev_priv)) { + dev_priv->uncore.funcs.mmio_readl = + gen9_decoupled_read32; + dev_priv->uncore.funcs.mmio_readq = + gen9_decoupled_read64; + dev_priv->uncore.funcs.mmio_writel = + gen9_decoupled_write32; + } break; case 8: if (IS_CHERRYVIEW(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_vbt_defs.h b/drivers/gpu/drm/i915/intel_vbt_defs.h index 68db9621f1f0..8886cab19f98 100644 --- a/drivers/gpu/drm/i915/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/intel_vbt_defs.h @@ -280,7 +280,8 @@ struct common_child_dev_config { u8 dp_support:1; u8 tmds_support:1; u8 support_reserved:5; - u8 not_common3[12]; + u8 aux_channel; + u8 not_common3[11]; u8 iboost_level; } __packed; |