summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c228
1 files changed, 166 insertions, 62 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e083f242b8dc..ba9f67c256f4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
* must wait for all rendering to complete to the object (as unbinding
* must anyway), and retire the requests.
*/
- ret = i915_gem_object_wait(obj,
- I915_WAIT_INTERRUPTIBLE |
- I915_WAIT_LOCKED |
- I915_WAIT_ALL,
- MAX_SCHEDULE_TIMEOUT,
- NULL);
+ ret = i915_gem_object_set_to_cpu_domain(obj, false);
if (ret)
return ret;
- i915_gem_retire_requests(to_i915(obj->base.dev));
-
while ((vma = list_first_entry_or_null(&obj->vma_list,
struct i915_vma,
obj_link))) {
@@ -673,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
}
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
{
- struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
- if (!(obj->base.write_domain & flush_domains))
- return;
-
- /* No actual flushing is required for the GTT write domain. Writes
- * to it "immediately" go to main memory as far as we know, so there's
- * no chipset flush. It also doesn't land in render cache.
+ /*
+ * No actual flushing is required for the GTT write domain for reads
+ * from the GTT domain. Writes to it "immediately" go to main memory
+ * as far as we know, so there's no chipset flush. It also doesn't
+ * land in the GPU render cache.
*
* However, we do have to enforce the order so that all writes through
* the GTT land before any writes to the device, such as updates to
@@ -694,22 +683,43 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
* timing. This issue has only been observed when switching quickly
* between GTT writes and CPU reads from inside the kernel on recent hw,
* and it appears to only affect discrete GTT blocks (i.e. on LLC
- * system agents we cannot reproduce this behaviour).
+ * system agents we cannot reproduce this behaviour, until Cannonlake
+ * that was!).
*/
+
wmb();
+ intel_runtime_pm_get(dev_priv);
+ spin_lock_irq(&dev_priv->uncore.lock);
+
+ POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
+
+ spin_unlock_irq(&dev_priv->uncore.lock);
+ intel_runtime_pm_put(dev_priv);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct i915_vma *vma;
+
+ if (!(obj->base.write_domain & flush_domains))
+ return;
+
switch (obj->base.write_domain) {
case I915_GEM_DOMAIN_GTT:
- if (!HAS_LLC(dev_priv)) {
- intel_runtime_pm_get(dev_priv);
- spin_lock_irq(&dev_priv->uncore.lock);
- POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base));
- spin_unlock_irq(&dev_priv->uncore.lock);
- intel_runtime_pm_put(dev_priv);
- }
+ i915_gem_flush_ggtt_writes(dev_priv);
intel_fb_obj_flush(obj,
fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+ for_each_ggtt_vma(vma, obj) {
+ if (vma->iomap)
+ continue;
+
+ i915_vma_unset_ggtt_write(vma);
+ }
break;
case I915_GEM_DOMAIN_CPU:
@@ -1106,7 +1116,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
page_base += offset & PAGE_MASK;
}
- if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
+ if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
user_data, page_length)) {
ret = -EFAULT;
break;
@@ -1314,7 +1324,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
* If the object is non-shmem backed, we retry again with the
* path that handles page fault.
*/
- if (ggtt_write(&ggtt->mappable, page_base, page_offset,
+ if (ggtt_write(&ggtt->iomap, page_base, page_offset,
user_data, page_length)) {
ret = -EFAULT;
break;
@@ -1556,10 +1566,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (!i915_vma_is_ggtt(vma))
- break;
-
+ for_each_ggtt_vma(vma, obj) {
if (i915_vma_is_active(vma))
continue;
@@ -1960,9 +1967,9 @@ int i915_gem_fault(struct vm_fault *vmf)
/* Finally, remap it using the new GTT offset */
ret = remap_io_mapping(area,
area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
- (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
+ (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
min_t(u64, vma->size, area->vm_end - area->vm_start),
- &ggtt->mappable);
+ &ggtt->iomap);
if (ret)
goto err_fence;
@@ -1972,6 +1979,8 @@ int i915_gem_fault(struct vm_fault *vmf)
list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
GEM_BUG_ON(!obj->userfault_count);
+ i915_vma_set_ggtt_write(vma);
+
err_fence:
i915_vma_unpin_fence(vma);
err_unpin:
@@ -2036,12 +2045,8 @@ static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
drm_vma_node_unmap(&obj->base.vma_node,
obj->base.dev->anon_inode->i_mapping);
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
- if (!i915_vma_is_ggtt(vma))
- break;
-
+ for_each_ggtt_vma(vma, obj)
i915_vma_unset_userfault(vma);
- }
}
/**
@@ -2591,7 +2596,7 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
}
err = obj->ops->get_pages(obj);
- GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages));
+ GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
return err;
}
@@ -3084,7 +3089,12 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
void i915_gem_reset_engine(struct intel_engine_cs *engine,
struct drm_i915_gem_request *request)
{
- engine->irq_posted = 0;
+ /*
+ * Make sure this write is visible before we re-enable the interrupt
+ * handlers on another CPU, as tasklet_enable() resolves to just
+ * a compiler barrier which is insufficient for our purpose here.
+ */
+ smp_store_mb(engine->irq_posted, 0);
if (request)
request = i915_gem_reset_request(engine, request);
@@ -3114,6 +3124,25 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
ctx = fetch_and_zero(&engine->last_retired_context);
if (ctx)
engine->context_unpin(engine, ctx);
+
+ /*
+ * Ostensibily, we always want a context loaded for powersaving,
+ * so if the engine is idle after the reset, send a request
+ * to load our scratch kernel_context.
+ *
+ * More mysteriously, if we leave the engine idle after a reset,
+ * the next userspace batch may hang, with what appears to be
+ * an incoherent read by the CS (presumably stale TLB). An
+ * empty request appears sufficient to paper over the glitch.
+ */
+ if (list_empty(&engine->timeline->requests)) {
+ struct drm_i915_gem_request *rq;
+
+ rq = i915_gem_request_alloc(engine,
+ dev_priv->kernel_context);
+ if (!IS_ERR(rq))
+ __i915_add_request(rq, false);
+ }
}
i915_gem_restore_fences(dev_priv);
@@ -3328,7 +3357,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
* Wait for last execlists context complete, but bail out in case a
* new request is submitted.
*/
- end = ktime_add_ms(ktime_get(), 200);
+ end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
do {
if (new_requests_since_last_retire(dev_priv))
return;
@@ -3381,6 +3410,9 @@ i915_gem_idle_work_handler(struct work_struct *work)
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
+
+ intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
+
intel_runtime_pm_put(dev_priv);
out_unlock:
mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -3525,8 +3557,19 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
static int wait_for_engines(struct drm_i915_private *i915)
{
- if (wait_for(intel_engines_are_idle(i915), 50)) {
- DRM_ERROR("Failed to idle engines, declaring wedged!\n");
+ if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
+ dev_err(i915->drm.dev,
+ "Failed to idle engines, declaring wedged!\n");
+ if (drm_debug & DRM_UT_DRIVER) {
+ struct drm_printer p = drm_debug_printer(__func__);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, i915, id)
+ intel_engine_dump(engine, &p,
+ "%s", engine->name);
+ }
+
i915_gem_set_wedged(i915);
return -EIO;
}
@@ -3552,9 +3595,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
if (ret)
return ret;
}
-
i915_gem_retire_requests(i915);
- GEM_BUG_ON(i915->gt.active_requests);
ret = wait_for_engines(i915);
} else {
@@ -3753,7 +3794,8 @@ restart:
return -EBUSY;
}
- if (i915_gem_valid_gtt_space(vma, cache_level))
+ if (!i915_vma_is_closed(vma) &&
+ i915_gem_valid_gtt_space(vma, cache_level))
continue;
ret = i915_vma_unbind(vma);
@@ -3806,7 +3848,7 @@ restart:
* dropped the fence as all snoopable access is
* supposed to be linear.
*/
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+ for_each_ggtt_vma(vma, obj) {
ret = i915_vma_put_fence(vma);
if (ret)
return ret;
@@ -4847,7 +4889,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
i915_gem_restore_gtt_mappings(i915);
i915_gem_restore_fences(i915);
- /* As we didn't flush the kernel context before suspend, we cannot
+ /*
+ * As we didn't flush the kernel context before suspend, we cannot
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
@@ -4868,8 +4911,10 @@ out_unlock:
return;
err_wedged:
- DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
- i915_gem_set_wedged(i915);
+ if (!i915_terminally_wedged(&i915->gpu_error)) {
+ DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
+ i915_gem_set_wedged(i915);
+ }
goto out_unlock;
}
@@ -5142,6 +5187,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret)
return ret;
+ ret = intel_uc_init_wq(dev_priv);
+ if (ret)
+ return ret;
+
/* This is just a security blanket to placate dragons.
* On some systems, we very sporadically observe that the first TLBs
* used by the CS may be stale, despite us poking the TLB reset. If
@@ -5152,22 +5201,32 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
ret = i915_gem_init_ggtt(dev_priv);
- if (ret)
- goto out_unlock;
+ if (ret) {
+ GEM_BUG_ON(ret == -EIO);
+ goto err_unlock;
+ }
ret = i915_gem_contexts_init(dev_priv);
- if (ret)
- goto out_unlock;
+ if (ret) {
+ GEM_BUG_ON(ret == -EIO);
+ goto err_ggtt;
+ }
ret = intel_engines_init(dev_priv);
- if (ret)
- goto out_unlock;
+ if (ret) {
+ GEM_BUG_ON(ret == -EIO);
+ goto err_context;
+ }
intel_init_gt_powersave(dev_priv);
+ ret = intel_uc_init(dev_priv);
+ if (ret)
+ goto err_pm;
+
ret = i915_gem_init_hw(dev_priv);
if (ret)
- goto out_unlock;
+ goto err_uc_init;
/*
* Despite its name intel_init_clock_gating applies both display
@@ -5181,9 +5240,55 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_init_clock_gating(dev_priv);
ret = __intel_engines_record_defaults(dev_priv);
-out_unlock:
+ if (ret)
+ goto err_init_hw;
+
+ if (i915_inject_load_failure()) {
+ ret = -ENODEV;
+ goto err_init_hw;
+ }
+
+ if (i915_inject_load_failure()) {
+ ret = -EIO;
+ goto err_init_hw;
+ }
+
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ return 0;
+
+ /*
+ * Unwinding is complicated by that we want to handle -EIO to mean
+ * disable GPU submission but keep KMS alive. We want to mark the
+ * HW as irrevisibly wedged, but keep enough state around that the
+ * driver doesn't explode during runtime.
+ */
+err_init_hw:
+ i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
+ i915_gem_contexts_lost(dev_priv);
+ intel_uc_fini_hw(dev_priv);
+err_uc_init:
+ intel_uc_fini(dev_priv);
+err_pm:
+ if (ret != -EIO) {
+ intel_cleanup_gt_powersave(dev_priv);
+ i915_gem_cleanup_engines(dev_priv);
+ }
+err_context:
+ if (ret != -EIO)
+ i915_gem_contexts_fini(dev_priv);
+err_ggtt:
+err_unlock:
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ if (ret != -EIO)
+ i915_gem_cleanup_userptr(dev_priv);
+
if (ret == -EIO) {
- /* Allow engine initialisation to fail by marking the GPU as
+ /*
+ * Allow engine initialisation to fail by marking the GPU as
* wedged. But we only want to do this where the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
@@ -5193,9 +5298,8 @@ out_unlock:
}
ret = 0;
}
- intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ i915_gem_drain_freed_objects(dev_priv);
return ret;
}
OpenPOWER on IntegriCloud