diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2012-11-15 17:17:22 +0100 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2013-01-20 13:11:16 +0100 |
commit | 1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c (patch) | |
tree | f876ef636a0e99c2afd8d20db81789e50d259439 /drivers/gpu/drm/i915/i915_gem.c | |
parent | 308887aad14c4ecc3fc10a3c58ec42641c5e4423 (diff) | |
download | talos-op-linux-1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c.tar.gz talos-op-linux-1f83fee08d625f8d0130f9fe5ef7b17c2e022f3c.zip |
drm/i915: clear up wedged transitions
We have two important transitions of the wedged state in the current
code:
- 0 -> 1: This means a hang has been detected, and signals to everyone
that they please get of any locks, so that the reset work item can
do its job.
- 1 -> 0: The reset handler has completed.
Now the last transition mixes up two states: "Reset completed and
successful" and "Reset failed". To distinguish these two we do some
tricks with the reset completion, but I simply could not convince
myself that this doesn't race under odd circumstances.
Hence split this up, and add a new terminal state indicating that the
hw is gone for good.
Also add explicit #defines for both states, update comments.
v2: Split out the reset handling bugfix for the throttle ioctl.
v3: s/tmp/wedged/ sugested by Chris Wilson. Also fixup up a rebase
error which prevented this patch from actually compiling.
v4: To unify the wedged state with the reset counter, keep the
reset-in-progress state just as a flag. The terminally-wedged state is
now denoted with a big number.
v5: Add a comment to the reset_counter special values explaining that
WEDGED & RESET_IN_PROGRESS needs to be true for the code to be
correct.
v6: Fixup logic errors introduced with the wedged+reset_counter
unification. Since WEDGED implies reset-in-progress (in a way we're
terminally stuck in the dead-but-reset-not-completed state), we need
ensure that we check for this everywhere. The specific bug was in
wait_for_error, which would simply have timed out.
v7: Extract an inline i915_reset_in_progress helper to make the code
more readable. Also annote the reset-in-progress case with an
unlikely, to help the compiler optimize the fastpath. Do the same for
the terminally wedged case with i915_terminally_wedged.
Reviewed-by: Damien Lespiau <damien.lespiau@intel.com>
Signed-Off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 49 |
1 files changed, 18 insertions, 31 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c96a501b8205..2ca901194824 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -89,36 +89,32 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, static int i915_gem_wait_for_error(struct i915_gpu_error *error) { - struct completion *x = &error->completion; - unsigned long flags; int ret; - if (!atomic_read(&error->wedged)) +#define EXIT_COND (!i915_reset_in_progress(error)) + if (EXIT_COND) return 0; + /* GPU is already declared terminally dead, give up. */ + if (i915_terminally_wedged(error)) + return -EIO; + /* * Only wait 10 seconds for the gpu reset to complete to avoid hanging * userspace. If it takes that long something really bad is going on and * we should simply try to bail out and fail as gracefully as possible. */ - ret = wait_for_completion_interruptible_timeout(x, 10*HZ); + ret = wait_event_interruptible_timeout(error->reset_queue, + EXIT_COND, + 10*HZ); if (ret == 0) { DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); return -EIO; } else if (ret < 0) { return ret; } +#undef EXIT_COND - if (atomic_read(&error->wedged)) { - /* GPU is hung, bump the completion count to account for - * the token we just consumed so that we never hit zero and - * end up waiting upon a subsequent completion event that - * will never happen. - */ - spin_lock_irqsave(&x->wait.lock, flags); - x->done++; - spin_unlock_irqrestore(&x->wait.lock, flags); - } return 0; } @@ -942,23 +938,14 @@ int i915_gem_check_wedge(struct i915_gpu_error *error, bool interruptible) { - if (atomic_read(&error->wedged)) { - struct completion *x = &error->completion; - bool recovery_complete; - unsigned long flags; - - /* Give the error handler a chance to run. */ - spin_lock_irqsave(&x->wait.lock, flags); - recovery_complete = x->done > 0; - spin_unlock_irqrestore(&x->wait.lock, flags); - + if (i915_reset_in_progress(error)) { /* Non-interruptible callers can't handle -EAGAIN, hence return * -EIO unconditionally for these. */ if (!interruptible) return -EIO; - /* Recovery complete, but still wedged means reset failure. */ - if (recovery_complete) + /* Recovery complete, but the reset failed ... */ + if (i915_terminally_wedged(error)) return -EIO; return -EAGAIN; @@ -1025,7 +1012,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, #define EXIT_COND \ (i915_seqno_passed(ring->get_seqno(ring, false), seqno) || \ - atomic_read(&dev_priv->gpu_error.wedged)) + i915_reset_in_progress(&dev_priv->gpu_error)) do { if (interruptible) end = wait_event_interruptible_timeout(ring->irq_queue, @@ -1379,7 +1366,7 @@ out: /* If this -EIO is due to a gpu hang, give the reset code a * chance to clean up the mess. Otherwise return the proper * SIGBUS. */ - if (!atomic_read(&dev_priv->gpu_error.wedged)) + if (i915_terminally_wedged(&dev_priv->gpu_error)) return VM_FAULT_SIGBUS; case -EAGAIN: /* Give the error handler a chance to run and move the @@ -3983,9 +3970,9 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; - if (atomic_read(&dev_priv->gpu_error.wedged)) { + if (i915_reset_in_progress(&dev_priv->gpu_error)) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); - atomic_set(&dev_priv->gpu_error.wedged, 0); + atomic_set(&dev_priv->gpu_error.reset_counter, 0); } mutex_lock(&dev->struct_mutex); @@ -4069,7 +4056,7 @@ i915_gem_load(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->mm.retire_work, i915_gem_retire_work_handler); - init_completion(&dev_priv->gpu_error.completion); + init_waitqueue_head(&dev_priv->gpu_error.reset_queue); /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ if (IS_GEN3(dev)) { |