diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_engine_cs.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_engine_cs.c | 1323 |
1 files changed, 438 insertions, 885 deletions
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index f7c25828d3bb..76b5f94ea6cb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -25,7 +25,6 @@ #include <drm/drm_print.h> #include "i915_drv.h" -#include "i915_vgpu.h" #include "intel_ringbuffer.h" #include "intel_lrc.h" @@ -81,13 +80,17 @@ static const struct engine_class_info intel_engine_classes[] = { }, }; +#define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; unsigned int uabi_id; u8 class; u8 instance; - u32 mmio_base; - unsigned irq_shift; + /* mmio bases table *must* be sorted in reverse gen order */ + struct engine_mmio_base { + u32 gen : 8; + u32 base : 24; + } mmio_bases[MAX_MMIO_BASES]; }; static const struct engine_info intel_engines[] = { @@ -96,64 +99,76 @@ static const struct engine_info intel_engines[] = { .uabi_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 1, .base = RENDER_RING_BASE } + }, }, [BCS] = { .hw_id = BCS_HW, .uabi_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 6, .base = BLT_RING_BASE } + }, }, [VCS] = { .hw_id = VCS_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD_RING_BASE }, + { .gen = 6, .base = GEN6_BSD_RING_BASE }, + { .gen = 4, .base = BSD_RING_BASE } + }, }, [VCS2] = { .hw_id = VCS2_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD2_RING_BASE }, + { .gen = 8, .base = GEN8_BSD2_RING_BASE } + }, }, [VCS3] = { .hw_id = VCS3_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 2, - .mmio_base = GEN11_BSD3_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD3_RING_BASE } + }, }, [VCS4] = { .hw_id = VCS4_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 3, - .mmio_base = GEN11_BSD4_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD4_RING_BASE } + }, }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, + { .gen = 7, .base = VEBOX_RING_BASE } + }, }, [VECS2] = { .hw_id = VECS2_HW, .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, - .mmio_base = GEN11_VEBOX2_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } + }, }, }; @@ -214,6 +229,7 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) break; default: MISSING_CASE(class); + /* fall through */ case VIDEO_DECODE_CLASS: case VIDEO_ENHANCEMENT_CLASS: case COPY_ENGINE_CLASS: @@ -223,16 +239,36 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) } } +static u32 __engine_mmio_base(struct drm_i915_private *i915, + const struct engine_mmio_base *bases) +{ + int i; + + for (i = 0; i < MAX_MMIO_BASES; i++) + if (INTEL_GEN(i915) >= bases[i].gen) + break; + + GEM_BUG_ON(i == MAX_MMIO_BASES); + GEM_BUG_ON(!bases[i].base); + + return bases[i].base; +} + +static void __sprint_engine_name(char *name, const struct engine_info *info) +{ + WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", + intel_engine_classes[info->class].name, + info->instance) >= INTEL_ENGINE_CS_MAX_NAME); +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; - const struct engine_class_info *class_info; struct intel_engine_cs *engine; GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); - class_info = &intel_engine_classes[info->class]; BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); @@ -253,45 +289,26 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->i915 = dev_priv; - WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", - class_info->name, info->instance) >= - sizeof(engine->name)); + __sprint_engine_name(engine->name, info); engine->hw_id = engine->guc_id = info->hw_id; - if (INTEL_GEN(dev_priv) >= 11) { - switch (engine->id) { - case VCS: - engine->mmio_base = GEN11_BSD_RING_BASE; - break; - case VCS2: - engine->mmio_base = GEN11_BSD2_RING_BASE; - break; - case VECS: - engine->mmio_base = GEN11_VEBOX_RING_BASE; - break; - default: - /* take the original value for all other engines */ - engine->mmio_base = info->mmio_base; - break; - } - } else { - engine->mmio_base = info->mmio_base; - } - engine->irq_shift = info->irq_shift; + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); engine->class = info->class; engine->instance = info->instance; engine->uabi_id = info->uabi_id; - engine->uabi_class = class_info->uabi_class; + engine->uabi_class = intel_engine_classes[info->class].uabi_class; engine->context_size = __intel_engine_context_size(dev_priv, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; + if (engine->context_size) + DRIVER_CAPS(dev_priv)->has_logical_contexts = true; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; - spin_lock_init(&engine->stats.lock); + seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -436,41 +453,21 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } -static void intel_engine_init_timeline(struct intel_engine_cs *engine) +static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; -} - -static bool csb_force_mmio(struct drm_i915_private *i915) -{ - /* - * IOMMU adds unpredictable latency causing the CSB write (from the - * GPU into the HWSP) to only be visible some time after the interrupt - * (missed breadcrumb syndrome). - */ - if (intel_vtd_active()) - return true; - - /* Older GVT emulation depends upon intercepting CSB mmio */ - if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) - return true; - - return false; + i915_gem_batch_pool_init(&engine->batch_pool, engine); } static void intel_engine_init_execlist(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; - execlists->csb_use_mmio = csb_force_mmio(engine->i915); - execlists->port_mask = 1; BUILD_BUG_ON_NOT_POWER_OF_2(execlists_num_ports(execlists)); GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); execlists->queue_priority = INT_MIN; - execlists->queue = RB_ROOT; - execlists->first = NULL; + execlists->queue = RB_ROOT_CACHED; } /** @@ -484,83 +481,25 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - intel_engine_init_execlist(engine); + i915_timeline_init(engine->i915, &engine->timeline, engine->name); + lockdep_set_subclass(&engine->timeline.lock, TIMELINE_ENGINE); - intel_engine_init_timeline(engine); + intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(engine, &engine->batch_pool); - + intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); } -int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int ret; - - WARN_ON(engine->scratch); - - obj = i915_gem_object_create_stolen(engine->i915, size); - if (!obj) - obj = i915_gem_object_create_internal(engine->i915, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - return PTR_ERR(obj); - } - - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto err_unref; - } - - ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); - if (ret) - goto err_unref; - - engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); - return 0; - -err_unref: - i915_gem_object_put(obj); - return ret; -} - -static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) -{ - i915_vma_unpin_and_release(&engine->scratch); -} - -static void cleanup_phys_status_page(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - if (!dev_priv->status_page_dmah) - return; - - drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah); - engine->status_page.page_addr = NULL; -} - static void cleanup_status_page(struct intel_engine_cs *engine) { - struct i915_vma *vma; - struct drm_i915_gem_object *obj; - - vma = fetch_and_zero(&engine->status_page.vma); - if (!vma) - return; + if (HWS_NEEDS_PHYSICAL(engine->i915)) { + void *addr = fetch_and_zero(&engine->status_page.page_addr); - obj = vma->obj; - - i915_vma_unpin(vma); - i915_vma_close(vma); + __free_page(virt_to_page(addr)); + } - i915_gem_object_unpin_map(obj); - __i915_gem_object_release_unless_active(obj); + i915_vma_unpin_and_release(&engine->status_page.vma, + I915_VMA_RELEASE_MAP); } static int init_status_page(struct intel_engine_cs *engine) @@ -581,7 +520,7 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; - vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL); + vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -602,7 +541,7 @@ static int init_status_page(struct intel_engine_cs *engine) flags |= PIN_MAPPABLE; else flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 4096, flags); + ret = i915_vma_pin(vma, 0, 0, flags); if (ret) goto err; @@ -615,9 +554,6 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); - - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unpin: @@ -629,21 +565,28 @@ err: static int init_phys_status_page(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - GEM_BUG_ON(engine->id != RCS); + struct page *page; - dev_priv->status_page_dmah = - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); - if (!dev_priv->status_page_dmah) + /* + * Though the HWS register does support 36bit addresses, historically + * we have had hangs and corruption reported due to wild writes if + * the HWS is placed above 4G. + */ + page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO); + if (!page) return -ENOMEM; - engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; - memset(engine->status_page.page_addr, 0, PAGE_SIZE); + engine->status_page.page_addr = page_address(page); return 0; } +static void __intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + intel_context_unpin(to_intel_context(ctx, engine)); +} + /** * intel_engines_init_common - initialize cengine state which might require hw access * @engine: Engine to initialize. @@ -657,7 +600,8 @@ static int init_phys_status_page(struct intel_engine_cs *engine) */ int intel_engine_init_common(struct intel_engine_cs *engine) { - struct intel_ring *ring; + struct drm_i915_private *i915 = engine->i915; + struct intel_context *ce; int ret; engine->set_default_submission(engine); @@ -669,19 +613,18 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = engine->context_pin(engine, engine->i915->kernel_context); - if (IS_ERR(ring)) - return PTR_ERR(ring); + ce = intel_context_pin(i915->kernel_context, engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); /* * Similarly the preempt context must always be available so that * we can interrupt the engine at any time. */ - if (engine->i915->preempt_context) { - ring = engine->context_pin(engine, - engine->i915->preempt_context); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); + if (i915->preempt_context) { + ce = intel_context_pin(i915->preempt_context, engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); goto err_unpin_kernel; } } @@ -690,7 +633,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) goto err_unpin_preempt; - if (HWS_NEEDS_PHYSICAL(engine->i915)) + if (HWS_NEEDS_PHYSICAL(i915)) ret = init_phys_status_page(engine); else ret = init_status_page(engine); @@ -702,10 +645,11 @@ int intel_engine_init_common(struct intel_engine_cs *engine) err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: - if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + err_unpin_kernel: - engine->context_unpin(engine, engine->i915->kernel_context); + __intel_context_unpin(i915->kernel_context, engine); return ret; } @@ -718,12 +662,9 @@ err_unpin_kernel: */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - intel_engine_cleanup_scratch(engine); + struct drm_i915_private *i915 = engine->i915; - if (HWS_NEEDS_PHYSICAL(engine->i915)) - cleanup_phys_status_page(engine); - else - cleanup_status_page(engine); + cleanup_status_page(engine); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); @@ -732,9 +673,13 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) i915_gem_object_put(engine->default_state); - if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); - engine->context_unpin(engine, engine->i915->kernel_context); + if (i915->preempt_context) + __intel_context_unpin(i915->preempt_context, engine); + __intel_context_unpin(i915->kernel_context, engine); + + i915_timeline_fini(&engine->timeline); + + intel_wa_list_free(&engine->wa_list); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) @@ -767,6 +712,45 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) return bbaddr; } +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + const i915_reg_t mode = RING_MI_MODE(base); + int err; + + if (INTEL_GEN(dev_priv) < 3) + return -ENODEV; + + GEM_TRACE("%s\n", engine->name); + + I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); + + err = 0; + if (__intel_wait_for_register_fw(dev_priv, + mode, MODE_IDLE, MODE_IDLE, + 1000, 0, + NULL)) { + GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + err = -ETIMEDOUT; + } + + /* A final mmio read to let GPU writes be hopefully flushed to memory */ + POSTING_READ_FW(mode); + + return err; +} + +void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + GEM_TRACE("%s\n", engine->name); + + I915_WRITE_FW(RING_MI_MODE(engine->mmio_base), + _MASKED_BIT_DISABLE(STOP_RING)); +} + const char *i915_cache_level_str(struct drm_i915_private *i915, int type) { switch (type) { @@ -778,14 +762,50 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type) } } +u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv) +{ + const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu); + u32 mcr_s_ss_select; + u32 slice = fls(sseu->slice_mask); + u32 subslice = fls(sseu->subslice_mask[slice]); + + if (INTEL_GEN(dev_priv) == 10) + mcr_s_ss_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + else if (INTEL_GEN(dev_priv) >= 11) + mcr_s_ss_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + else + mcr_s_ss_select = 0; + + return mcr_s_ss_select; +} + static inline uint32_t read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { + uint32_t mcr_slice_subslice_mask; + uint32_t mcr_slice_subslice_select; + uint32_t default_mcr_s_ss_select; uint32_t mcr; uint32_t ret; enum forcewake_domains fw_domains; + if (INTEL_GEN(dev_priv) >= 11) { + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + } + + default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(dev_priv); + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, @@ -796,18 +816,19 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, intel_uncore_forcewake_get__locked(dev_priv, fw_domains); mcr = I915_READ_FW(GEN8_MCR_SELECTOR); - /* - * The HW expects the slice and sublice selectors to be reset to 0 - * after reading out the registers. - */ - WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK)); - mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); - mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + + WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) != + default_mcr_s_ss_select); + + mcr &= ~mcr_slice_subslice_mask; + mcr |= mcr_slice_subslice_select; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); ret = I915_READ_FW(reg); - mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); + mcr &= ~mcr_slice_subslice_mask; + mcr |= default_mcr_s_ss_select; + I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(dev_priv, fw_domains); @@ -871,644 +892,6 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, } } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw - * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() - * to disable EUTC clock gating. - */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - if (!IS_COFFEELAKE(dev_priv)) - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - if (HAS_LLC(dev_priv)) { - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl - * - * Must match Display Engine. See - * WaCompressedResourceDisplayNewHashMode. - */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN9_PBE_COMPRESSED_HASH_SELECTION); - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - - I915_WRITE(MMCD_MISC_CTRL, - I915_READ(MMCD_MISC_CTRL) | - MMCD_PCLA | - MMCD_HOTSPOT_EN); - } - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(dev_priv)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ - if (IS_GEN9_LP(dev_priv)) { - u32 val = I915_READ(GEN8_L3SQCREG1); - - val &= ~L3_PRIO_CREDITS_MASK; - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); - I915_WRITE(GEN8_L3SQCREG1, val); - } - - /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* - * Supporting preemption with fine-granularity requires changes in the - * batch buffer programming. Since we can't break old userspace, we - * need to set our default preemption level to safe value. Userspace is - * still able to use more fine-grained preemption levels, since in - * WaEnablePreemptionGranularityControlByUMD we're whitelisting the - * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are - * not real HW workarounds, but merely a way to start using preemption - * while maintaining old contract with userspace. - */ - - /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ - if (IS_GEN9_LP(dev_priv)) - WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - I915_WRITE(FF_SLICE_CS_CHICKEN2, - _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - return 0; -} - -static int cnl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) - I915_WRITE(GAMT_CHKN_BIT_REG, - (I915_READ(GAMT_CHKN_BIT_REG) | - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT)); - - /* WaForceContextSaveRestoreNonCoherent:cnl */ - WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - - /* WaInPlaceDecompressionHang:cnl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix: cnl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaDisableEarlyEOT:cnl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - I915_WRITE(GAMT_CHKN_BIT_REG, - (I915_READ(GAMT_CHKN_BIT_REG) | - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING)); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -static int glk_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); - if (ret) - return ret; - - /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; -} - -static int cfl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:cfl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaToEnableHwFixForPushConstHWBug:cfl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:cfl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaDisableSbeCacheDispatchPortSharing:cfl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:cfl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int err; - - if (GEM_WARN_ON(engine->id != RCS)) - return -EINVAL; - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; - - if (IS_BROADWELL(dev_priv)) - err = bdw_init_workarounds(engine); - else if (IS_CHERRYVIEW(dev_priv)) - err = chv_init_workarounds(engine); - else if (IS_SKYLAKE(dev_priv)) - err = skl_init_workarounds(engine); - else if (IS_BROXTON(dev_priv)) - err = bxt_init_workarounds(engine); - else if (IS_KABYLAKE(dev_priv)) - err = kbl_init_workarounds(engine); - else if (IS_GEMINILAKE(dev_priv)) - err = glk_init_workarounds(engine); - else if (IS_COFFEELAKE(dev_priv)) - err = cfl_init_workarounds(engine); - else if (IS_CANNONLAKE(dev_priv)) - err = cnl_init_workarounds(engine); - else - err = 0; - if (err) - return err; - - DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", - engine->name, dev_priv->workarounds.count); - return 0; -} - -int intel_ring_workarounds_emit(struct i915_request *rq) -{ - struct i915_workarounds *w = &rq->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(rq, w->count * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1548,19 +931,34 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; /* Any inflight/incomplete requests? */ - if (!i915_seqno_passed(intel_engine_get_seqno(engine), - intel_engine_last_submit(engine))) + if (!intel_engine_signaled(engine, intel_engine_last_submit(engine))) return false; if (I915_SELFTEST_ONLY(engine->breadcrumbs.mock)) return true; /* Waiting to drain ELSP? */ - if (READ_ONCE(engine->execlists.active)) - return false; + if (READ_ONCE(engine->execlists.active)) { + struct tasklet_struct *t = &engine->execlists.tasklet; + + local_bh_disable(); + if (tasklet_trylock(t)) { + /* Must wait for any GPU reset in progress. */ + if (__tasklet_is_enabled(t)) + t->func(t->data); + tasklet_unlock(t); + } + local_bh_enable(); - /* ELSP is empty, but there are ready requests? */ - if (READ_ONCE(engine->execlists.first)) + /* Otherwise flush the tasklet if it was on another cpu */ + tasklet_unlock_wait(t); + + if (READ_ONCE(engine->execlists.active)) + return false; + } + + /* ELSP is empty, but there are ready requests? E.g. after reset */ + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) return false; /* Ring stopped? */ @@ -1600,8 +998,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv) */ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) { - const struct i915_gem_context * const kernel_context = - engine->i915->kernel_context; + const struct intel_context *kernel_context = + to_intel_context(engine->i915->kernel_context, engine); struct i915_request *rq; lockdep_assert_held(&engine->i915->drm.struct_mutex); @@ -1611,9 +1009,9 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline->last_request); + rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) - return rq->ctx == kernel_context; + return rq->hw_context == kernel_context; else return engine->last_retired_context == kernel_context; } @@ -1628,6 +1026,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) } /** + * intel_engines_sanitize: called after the GPU has lost power + * @i915: the i915 device + * + * Anytime we reset the GPU, either with an explicit GPU reset or through a + * PCI power cycle, the GPU loses state and we must reset our state tracking + * to match. Note that calling intel_engines_sanitize() if the GPU has not + * been reset results in much confusion! + */ +void intel_engines_sanitize(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + GEM_TRACE("\n"); + + for_each_engine(engine, i915, id) { + if (engine->reset.reset) + engine->reset.reset(engine, NULL); + } +} + +/** * intel_engines_park: called when the GT is transitioning from busy->idle * @i915: the i915 device * @@ -1659,9 +1079,17 @@ void intel_engines_park(struct drm_i915_private *i915) intel_engine_dump(engine, &p, NULL); } + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); + if (engine->park) engine->park(engine); + if (engine->pinned_default_state) { + i915_gem_object_unpin_map(engine->default_state); + engine->pinned_default_state = NULL; + } + i915_gem_batch_pool_fini(&engine->batch_pool); engine->execlists.no_priolist = false; } @@ -1679,11 +1107,43 @@ void intel_engines_unpark(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { + void *map; + + /* Pin the default state for fast resets from atomic context. */ + map = NULL; + if (engine->default_state) + map = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR_OR_NULL(map)) + engine->pinned_default_state = map; + if (engine->unpark) engine->unpark(engine); + + intel_engine_init_hangcheck(engine); } } +/** + * intel_engine_lost_context: called when the GPU is reset into unknown state + * @engine: the engine + * + * We have either reset the GPU or otherwise about to lose state tracking of + * the current GPU logical state (e.g. suspend). On next use, it is therefore + * imperative that we make no presumptions about the current state and load + * from scratch. + */ +void intel_engine_lost_context(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + + lockdep_assert_held(&engine->i915->drm.struct_mutex); + + ce = fetch_and_zero(&engine->last_retired_context); + if (ce) + intel_context_unpin(ce); +} + bool intel_engine_can_store_dword(struct intel_engine_cs *engine) { switch (INTEL_GEN(engine->i915)) { @@ -1713,17 +1173,37 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) return which; } +static int print_sched_attr(struct drm_i915_private *i915, + const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, + const char *name = rq->fence.ops->get_timeline_name(&rq->fence); + char buf[80] = ""; + int x = 0; + + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", rq->fence.context, rq->fence.seqno, - rq->priotree.priority, + buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), - rq->timeline->common->name); + name); } static void hexdump(struct drm_printer *m, const void *buf, size_t len) @@ -1748,7 +1228,7 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len) rowsize, sizeof(u32), line, sizeof(line), false) >= sizeof(line)); - drm_printf(m, "%08zx %s\n", pos, line); + drm_printf(m, "[%04zx] %s\n", pos, line); prev = buf + pos; skip = false; @@ -1763,6 +1243,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, &engine->execlists; u64 addr; + if (engine->id == RCS && IS_GEN(dev_priv, 4, 7)) + drm_printf(m, "\tCCID: 0x%08x\n", I915_READ(CCID)); drm_printf(m, "\tRING_START: 0x%08x\n", I915_READ(RING_START(engine->mmio_base))); drm_printf(m, "\tRING_HEAD: 0x%08x\n", @@ -1819,22 +1301,22 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, if (HAS_EXECLISTS(dev_priv)) { const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX]; - u32 ptr, read, write; unsigned int idx; + u8 read, write; drm_printf(m, "\tExeclist status: 0x%08x %08x\n", I915_READ(RING_EXECLIST_STATUS_LO(engine)), I915_READ(RING_EXECLIST_STATUS_HI(engine))); - ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); - read = GEN8_CSB_READ_PTR(ptr); - write = GEN8_CSB_WRITE_PTR(ptr); - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", - read, execlists->csb_head, - write, - intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); + read = execlists->csb_head; + write = READ_ONCE(*execlists->csb_write); + + drm_printf(m, "\tExeclist CSB read %d, write %d [mmio:%d], tasklet queued? %s (%s)\n", + read, write, + GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(engine))), + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); if (read >= GEN8_CSB_ENTRIES) read = 0; if (write >= GEN8_CSB_ENTRIES) @@ -1843,12 +1325,12 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, write += GEN8_CSB_ENTRIES; while (read < write) { idx = ++read % GEN8_CSB_ENTRIES; - drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [0x%08x in hwsp], context: %d [%d in hwsp]\n", + drm_printf(m, "\tExeclist CSB[%d]: 0x%08x [mmio:0x%08x], context: %d [mmio:%d]\n", idx, - I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), hws[idx * 2], - I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx)), - hws[idx * 2 + 1]); + I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)), + hws[idx * 2 + 1], + I915_READ(RING_CONTEXT_STATUS_BUF_HI(engine, idx))); } rcu_read_lock(); @@ -1861,8 +1343,9 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, rq: ", - idx, count); + "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + idx, count, + i915_ggtt_offset(rq->ring->vma)); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1880,15 +1363,51 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, } } +static void print_request_ring(struct drm_printer *m, struct i915_request *rq) +{ + void *ring; + int size; + + drm_printf(m, + "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", + rq->head, rq->postfix, rq->tail, + rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, + rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); + + size = rq->tail - rq->head; + if (rq->tail < rq->head) + size += rq->ring->size; + + ring = kmalloc(size, GFP_ATOMIC); + if (ring) { + const void *vaddr = rq->ring->vaddr; + unsigned int head = rq->head; + unsigned int len = 0; + + if (rq->tail < head) { + len = rq->ring->size - head; + memcpy(ring, vaddr + head, len); + head = 0; + } + memcpy(ring + len, vaddr + head, size - len); + + hexdump(m, ring, size); + kfree(ring); + } +} + void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { + const int MAX_REQUESTS_TO_SHOW = 8; struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq; + struct i915_request *rq, *last; + unsigned long flags; struct rb_node *rb; + int count; if (header) { va_list ap; @@ -1901,12 +1420,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1915,30 +1433,32 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline->requests, + rq = list_first_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline->requests, + rq = list_last_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); if (rq) { print_request(m, rq, "\t\tactive "); - drm_printf(m, - "\t\t[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]\n", - rq->head, rq->postfix, rq->tail, - rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, - rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - drm_printf(m, "\t\tring->start: 0x%08x\n", + + drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", + drm_printf(m, "\t\tring->head: 0x%08x\n", rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", + drm_printf(m, "\t\tring->tail: 0x%08x\n", rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); + + print_request_ring(m, rq); } rcu_read_unlock(); @@ -1950,33 +1470,64 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) - print_request(m, rq, "\t\tE "); + local_irq_save(flags); + spin_lock(&engine->timeline.lock); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); - for (rb = execlists->first; rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - list_for_each_entry(rq, &p->requests, priotree.link) - print_request(m, rq, "\t\tQ "); + list_for_each_entry(rq, &p->requests, sched.link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tQ "); + else + last = rq; + } + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tQ "); } - spin_unlock_irq(&engine->timeline->lock); - spin_lock_irq(&b->rb_lock); + spin_unlock(&engine->timeline.lock); + + spin_lock(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); drm_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->rb_lock); + spin_unlock(&b->rb_lock); + local_irq_restore(flags); - drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n", + drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n", engine->irq_posted, yesno(test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)), - yesno(test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))); drm_printf(m, "HWSP:\n"); @@ -2025,8 +1576,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return -ENODEV; - tasklet_disable(&execlists->tasklet); - spin_lock_irqsave(&engine->stats.lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); + write_seqlock(&engine->stats.lock); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -2050,8 +1601,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - spin_unlock_irqrestore(&engine->stats.lock, flags); - tasklet_enable(&execlists->tasklet); + write_sequnlock(&engine->stats.lock); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return err; } @@ -2079,12 +1630,13 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) { + unsigned int seq; ktime_t total; - unsigned long flags; - spin_lock_irqsave(&engine->stats.lock, flags); - total = __intel_engine_get_busy_time(engine); - spin_unlock_irqrestore(&engine->stats.lock, flags); + do { + seq = read_seqbegin(&engine->stats.lock); + total = __intel_engine_get_busy_time(engine); + } while (read_seqretry(&engine->stats.lock, seq)); return total; } @@ -2102,15 +1654,16 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); WARN_ON_ONCE(engine->stats.enabled == 0); if (--engine->stats.enabled == 0) { engine->stats.total = __intel_engine_get_busy_time(engine); engine->stats.active = 0; } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" +#include "selftests/intel_engine_cs.c" #endif |

