diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 264 |
1 files changed, 164 insertions, 100 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 61ba321e9970..4a7f5579a7a5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED | - I915_WAIT_ALL, - MAX_SCHEDULE_TIMEOUT, - NULL); + ret = i915_gem_object_set_to_cpu_domain(obj, false); if (ret) return ret; - i915_gem_retire_requests(to_i915(obj->base.dev)); - while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { @@ -673,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } -static void -flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - - if (!(obj->base.write_domain & flush_domains)) - return; - - /* No actual flushing is required for the GTT write domain. Writes - * to it "immediately" go to main memory as far as we know, so there's - * no chipset flush. It also doesn't land in render cache. + /* + * No actual flushing is required for the GTT write domain for reads + * from the GTT domain. Writes to it "immediately" go to main memory + * as far as we know, so there's no chipset flush. It also doesn't + * land in the GPU render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to @@ -694,22 +683,43 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) * timing. This issue has only been observed when switching quickly * between GTT writes and CPU reads from inside the kernel on recent hw, * and it appears to only affect discrete GTT blocks (i.e. on LLC - * system agents we cannot reproduce this behaviour). + * system agents we cannot reproduce this behaviour, until Cannonlake + * that was!). */ + wmb(); + intel_runtime_pm_get(dev_priv); + spin_lock_irq(&dev_priv->uncore.lock); + + POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE)); + + spin_unlock_irq(&dev_priv->uncore.lock); + intel_runtime_pm_put(dev_priv); +} + +static void +flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + + if (!(obj->base.write_domain & flush_domains)) + return; + switch (obj->base.write_domain) { case I915_GEM_DOMAIN_GTT: - if (!HAS_LLC(dev_priv)) { - intel_runtime_pm_get(dev_priv); - spin_lock_irq(&dev_priv->uncore.lock); - POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base)); - spin_unlock_irq(&dev_priv->uncore.lock); - intel_runtime_pm_put(dev_priv); - } + i915_gem_flush_ggtt_writes(dev_priv); intel_fb_obj_flush(obj, fb_write_origin(obj, I915_GEM_DOMAIN_GTT)); + + for_each_ggtt_vma(vma, obj) { + if (vma->iomap) + continue; + + i915_vma_unset_ggtt_write(vma); + } break; case I915_GEM_DOMAIN_CPU: @@ -1106,7 +1116,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, page_base += offset & PAGE_MASK; } - if (gtt_user_read(&ggtt->mappable, page_base, page_offset, + if (gtt_user_read(&ggtt->iomap, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; break; @@ -1314,7 +1324,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, * If the object is non-shmem backed, we retry again with the * path that handles page fault. */ - if (ggtt_write(&ggtt->mappable, page_base, page_offset, + if (ggtt_write(&ggtt->iomap, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; break; @@ -1556,10 +1566,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) { if (i915_vma_is_active(vma)) continue; @@ -1960,9 +1967,9 @@ int i915_gem_fault(struct vm_fault *vmf) /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), - (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, + (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->mappable); + &ggtt->iomap); if (ret) goto err_fence; @@ -1972,6 +1979,8 @@ int i915_gem_fault(struct vm_fault *vmf) list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); GEM_BUG_ON(!obj->userfault_count); + i915_vma_set_ggtt_write(vma); + err_fence: i915_vma_unpin_fence(vma); err_unpin: @@ -2036,12 +2045,8 @@ static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj) drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - break; - + for_each_ggtt_vma(vma, obj) i915_vma_unset_userfault(vma); - } } /** @@ -3328,7 +3333,7 @@ i915_gem_idle_work_handler(struct work_struct *work) * Wait for last execlists context complete, but bail out in case a * new request is submitted. */ - end = ktime_add_ms(ktime_get(), 200); + end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT); do { if (new_requests_since_last_retire(dev_priv)) return; @@ -3371,7 +3376,9 @@ i915_gem_idle_work_handler(struct work_struct *work) synchronize_irq(dev_priv->drm.irq); intel_engines_park(dev_priv); - i915_gem_timelines_mark_idle(dev_priv); + i915_gem_timelines_park(dev_priv); + + i915_pmu_gt_parked(dev_priv); GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; @@ -3379,6 +3386,9 @@ i915_gem_idle_work_handler(struct work_struct *work) if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); + + intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); + intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3523,8 +3533,19 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) static int wait_for_engines(struct drm_i915_private *i915) { - if (wait_for(intel_engines_are_idle(i915), 50)) { - DRM_ERROR("Failed to idle engines, declaring wedged!\n"); + if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { + dev_err(i915->drm.dev, + "Failed to idle engines, declaring wedged!\n"); + if (drm_debug & DRM_UT_DRIVER) { + struct drm_printer p = drm_debug_printer(__func__); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + intel_engine_dump(engine, &p, + "%s", engine->name); + } + i915_gem_set_wedged(i915); return -EIO; } @@ -3550,9 +3571,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) if (ret) return ret; } - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests); ret = wait_for_engines(i915); } else { @@ -3751,7 +3770,8 @@ restart: return -EBUSY; } - if (i915_gem_valid_gtt_space(vma, cache_level)) + if (!i915_vma_is_closed(vma) && + i915_gem_valid_gtt_space(vma, cache_level)) continue; ret = i915_vma_unbind(vma); @@ -3804,7 +3824,7 @@ restart: * dropped the fence as all snoopable access is * supposed to be linear. */ - list_for_each_entry(vma, &obj->vma_list, obj_link) { + for_each_ggtt_vma(vma, obj) { ret = i915_vma_put_fence(vma); if (ret) return ret; @@ -4772,17 +4792,19 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - ret = i915_gem_switch_to_kernel_context(dev_priv); - if (ret) - goto err_unlock; + if (!i915_terminally_wedged(&dev_priv->gpu_error)) { + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err_unlock; - ret = i915_gem_wait_for_idle(dev_priv, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret && ret != -EIO) - goto err_unlock; + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret && ret != -EIO) + goto err_unlock; - assert_kernel_context_is_current(dev_priv); + assert_kernel_context_is_current(dev_priv); + } i915_gem_contexts_lost(dev_priv); mutex_unlock(&dev->struct_mutex); @@ -4843,7 +4865,8 @@ void i915_gem_resume(struct drm_i915_private *i915) i915_gem_restore_gtt_mappings(i915); i915_gem_restore_fences(i915); - /* As we didn't flush the kernel context before suspend, we cannot + /* + * As we didn't flush the kernel context before suspend, we cannot * guarantee that the context image is complete. So let's just reset * it and start again. */ @@ -4864,8 +4887,10 @@ out_unlock: return; err_wedged: - DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); - i915_gem_set_wedged(i915); + if (!i915_terminally_wedged(&i915->gpu_error)) { + DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n"); + i915_gem_set_wedged(i915); + } goto out_unlock; } @@ -4997,25 +5022,6 @@ out: return ret; } -bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value) -{ - if (INTEL_INFO(dev_priv)->gen < 6) - return false; - - /* TODO: make semaphores and Execlists play nicely together */ - if (i915_modparams.enable_execlists) - return false; - - if (value >= 0) - return value; - - /* Enable semaphores on SNB when IO remapping is off */ - if (IS_GEN6(dev_priv) && intel_vtd_active()) - return false; - - return true; -} - static int __intel_engines_record_defaults(struct drm_i915_private *i915) { struct i915_gem_context *ctx; @@ -5045,7 +5051,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) goto out_ctx; } - err = i915_switch_context(rq); + err = 0; if (engine->init_context) err = engine->init_context(rq); @@ -5134,8 +5140,6 @@ int i915_gem_init(struct drm_i915_private *dev_priv) { int ret; - mutex_lock(&dev_priv->drm.struct_mutex); - /* * We need to fallback to 4K pages since gvt gtt handling doesn't * support huge page entries - we will need to check either hypervisor @@ -5147,43 +5151,58 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - if (!i915_modparams.enable_execlists) { - dev_priv->gt.resume = intel_legacy_submission_resume; - dev_priv->gt.cleanup_engine = intel_engine_cleanup; - } else { + if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { dev_priv->gt.resume = intel_lr_context_resume; dev_priv->gt.cleanup_engine = intel_logical_ring_cleanup; + } else { + dev_priv->gt.resume = intel_legacy_submission_resume; + dev_priv->gt.cleanup_engine = intel_engine_cleanup; } + ret = i915_gem_init_userptr(dev_priv); + if (ret) + return ret; + + ret = intel_uc_init_wq(dev_priv); + if (ret) + return ret; + /* This is just a security blanket to placate dragons. * On some systems, we very sporadically observe that the first TLBs * used by the CS may be stale, despite us poking the TLB reset. If * we hold the forcewake during initialisation these problems * just magically go away. */ + mutex_lock(&dev_priv->drm.struct_mutex); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = i915_gem_init_userptr(dev_priv); - if (ret) - goto out_unlock; - ret = i915_gem_init_ggtt(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_unlock; + } ret = i915_gem_contexts_init(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_ggtt; + } ret = intel_engines_init(dev_priv); - if (ret) - goto out_unlock; + if (ret) { + GEM_BUG_ON(ret == -EIO); + goto err_context; + } intel_init_gt_powersave(dev_priv); + ret = intel_uc_init(dev_priv); + if (ret) + goto err_pm; + ret = i915_gem_init_hw(dev_priv); if (ret) - goto out_unlock; + goto err_uc_init; /* * Despite its name intel_init_clock_gating applies both display @@ -5197,9 +5216,55 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_init_clock_gating(dev_priv); ret = __intel_engines_record_defaults(dev_priv); -out_unlock: + if (ret) + goto err_init_hw; + + if (i915_inject_load_failure()) { + ret = -ENODEV; + goto err_init_hw; + } + + if (i915_inject_load_failure()) { + ret = -EIO; + goto err_init_hw; + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->drm.struct_mutex); + + return 0; + + /* + * Unwinding is complicated by that we want to handle -EIO to mean + * disable GPU submission but keep KMS alive. We want to mark the + * HW as irrevisibly wedged, but keep enough state around that the + * driver doesn't explode during runtime. + */ +err_init_hw: + i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED); + i915_gem_contexts_lost(dev_priv); + intel_uc_fini_hw(dev_priv); +err_uc_init: + intel_uc_fini(dev_priv); +err_pm: + if (ret != -EIO) { + intel_cleanup_gt_powersave(dev_priv); + i915_gem_cleanup_engines(dev_priv); + } +err_context: + if (ret != -EIO) + i915_gem_contexts_fini(dev_priv); +err_ggtt: +err_unlock: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&dev_priv->drm.struct_mutex); + + if (ret != -EIO) + i915_gem_cleanup_userptr(dev_priv); + if (ret == -EIO) { - /* Allow engine initialisation to fail by marking the GPU as + /* + * Allow engine initialisation to fail by marking the GPU as * wedged. But we only want to do this where the GPU is angry, * for all other failure, such as an allocation failure, bail. */ @@ -5209,9 +5274,8 @@ out_unlock: } ret = 0; } - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev_priv->drm.struct_mutex); + i915_gem_drain_freed_objects(dev_priv); return ret; } |