diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_context.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_context.c | 291 |
1 files changed, 183 insertions, 108 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index e5acc3916f75..2aedd188473d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -90,6 +90,8 @@ #include "i915_drv.h" #include "i915_trace.h" +#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 + /* This is a HW constraint. The value below is the largest known requirement * I've seen in a spec to date, and that was a workaround for a non-shipping * part. It should be safe to decrease this, but it's more future proof as is. @@ -97,28 +99,27 @@ #define GEN6_CONTEXT_ALIGN (64<<10) #define GEN7_CONTEXT_ALIGN 4096 -static size_t get_context_alignment(struct drm_device *dev) +static size_t get_context_alignment(struct drm_i915_private *dev_priv) { - if (IS_GEN6(dev)) + if (IS_GEN6(dev_priv)) return GEN6_CONTEXT_ALIGN; return GEN7_CONTEXT_ALIGN; } -static int get_context_size(struct drm_device *dev) +static int get_context_size(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; int ret; u32 reg; - switch (INTEL_INFO(dev)->gen) { + switch (INTEL_GEN(dev_priv)) { case 6: reg = I915_READ(CXT_SIZE); ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; break; case 7: reg = I915_READ(GEN7_CXT_SIZE); - if (IS_HASWELL(dev)) + if (IS_HASWELL(dev_priv)) ret = HSW_CXT_TOTAL_SIZE; else ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; @@ -169,6 +170,8 @@ void i915_gem_context_free(struct kref *ctx_ref) if (ctx->legacy_hw_ctx.rcs_state) drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base); list_del(&ctx->link); + + ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); kfree(ctx); } @@ -178,9 +181,9 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) struct drm_i915_gem_object *obj; int ret; - obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + obj = i915_gem_object_create(dev, size); + if (IS_ERR(obj)) + return obj; /* * Try to make the context utilize L3 as well as LLC. @@ -209,6 +212,28 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) return obj; } +static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) +{ + int ret; + + ret = ida_simple_get(&dev_priv->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) { + /* Contexts are only released when no longer active. + * Flush any pending retires to hopefully release some + * stale contexts and try again. + */ + i915_gem_retire_requests(dev_priv); + ret = ida_simple_get(&dev_priv->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) + return ret; + } + + *out = ret; + return 0; +} + static struct intel_context * __create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv) @@ -221,6 +246,12 @@ __create_hw_context(struct drm_device *dev, if (ctx == NULL) return ERR_PTR(-ENOMEM); + ret = assign_hw_id(dev_priv, &ctx->hw_id); + if (ret) { + kfree(ctx); + return ERR_PTR(ret); + } + kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; @@ -249,7 +280,7 @@ __create_hw_context(struct drm_device *dev, /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there * is no remap info, it will be a NOP. */ - ctx->remap_slice = (1 << NUM_L3_SLICES(dev)) - 1; + ctx->remap_slice = ALL_L3_SLICES(dev_priv); ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD; @@ -288,7 +319,7 @@ i915_gem_create_context(struct drm_device *dev, * context. */ ret = i915_gem_obj_ggtt_pin(ctx->legacy_hw_ctx.rcs_state, - get_context_alignment(dev), 0); + get_context_alignment(to_i915(dev)), 0); if (ret) { DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret); goto err_destroy; @@ -336,7 +367,6 @@ static void i915_gem_context_unpin(struct intel_context *ctx, void i915_gem_context_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int i; if (i915.enable_execlists) { struct intel_context *ctx; @@ -345,17 +375,7 @@ void i915_gem_context_reset(struct drm_device *dev) intel_lr_context_reset(dev_priv, ctx); } - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_engine_cs *engine = &dev_priv->engine[i]; - - if (engine->last_context) { - i915_gem_context_unpin(engine->last_context, engine); - engine->last_context = NULL; - } - } - - /* Force the GPU state to be reinitialised on enabling */ - dev_priv->kernel_context->legacy_hw_ctx.initialized = false; + i915_gem_context_lost(dev_priv); } int i915_gem_context_init(struct drm_device *dev) @@ -368,19 +388,25 @@ int i915_gem_context_init(struct drm_device *dev) if (WARN_ON(dev_priv->kernel_context)) return 0; - if (intel_vgpu_active(dev) && HAS_LOGICAL_RING_CONTEXTS(dev)) { + if (intel_vgpu_active(dev_priv) && + HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { if (!i915.enable_execlists) { DRM_INFO("Only EXECLIST mode is supported in vgpu.\n"); return -EINVAL; } } + /* Using the simple ida interface, the max is limited by sizeof(int) */ + BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); + ida_init(&dev_priv->context_hw_ida); + if (i915.enable_execlists) { /* NB: intentionally left blank. We will allocate our own * backing objects as we need them, thank you very much */ dev_priv->hw_context_size = 0; - } else if (HAS_HW_CONTEXTS(dev)) { - dev_priv->hw_context_size = round_up(get_context_size(dev), 4096); + } else if (HAS_HW_CONTEXTS(dev_priv)) { + dev_priv->hw_context_size = + round_up(get_context_size(dev_priv), 4096); if (dev_priv->hw_context_size > (1<<20)) { DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", dev_priv->hw_context_size); @@ -403,61 +429,35 @@ int i915_gem_context_init(struct drm_device *dev) return 0; } -void i915_gem_context_fini(struct drm_device *dev) +void i915_gem_context_lost(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_context *dctx = dev_priv->kernel_context; - int i; - - if (dctx->legacy_hw_ctx.rcs_state) { - /* The only known way to stop the gpu from accessing the hw context is - * to reset it. Do this as the very last operation to avoid confusing - * other code, leading to spurious errors. */ - intel_gpu_reset(dev, ALL_ENGINES); - - /* When default context is created and switched to, base object refcount - * will be 2 (+1 from object creation and +1 from do_switch()). - * i915_gem_context_fini() will be called after gpu_idle() has switched - * to default context. So we need to unreference the base object once - * to offset the do_switch part, so that i915_gem_context_unreference() - * can then free the base object correctly. */ - WARN_ON(!dev_priv->engine[RCS].last_context); + struct intel_engine_cs *engine; - i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state); - } - - for (i = I915_NUM_ENGINES; --i >= 0;) { - struct intel_engine_cs *engine = &dev_priv->engine[i]; + for_each_engine(engine, dev_priv) { + if (engine->last_context == NULL) + continue; - if (engine->last_context) { - i915_gem_context_unpin(engine->last_context, engine); - engine->last_context = NULL; - } + i915_gem_context_unpin(engine->last_context, engine); + engine->last_context = NULL; } - i915_gem_context_unreference(dctx); - dev_priv->kernel_context = NULL; + /* Force the GPU state to be reinitialised on enabling */ + dev_priv->kernel_context->legacy_hw_ctx.initialized = false; + dev_priv->kernel_context->remap_slice = ALL_L3_SLICES(dev_priv); } -int i915_gem_context_enable(struct drm_i915_gem_request *req) +void i915_gem_context_fini(struct drm_device *dev) { - struct intel_engine_cs *engine = req->engine; - int ret; - - if (i915.enable_execlists) { - if (engine->init_context == NULL) - return 0; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_context *dctx = dev_priv->kernel_context; - ret = engine->init_context(req); - } else - ret = i915_switch_context(req); + if (dctx->legacy_hw_ctx.rcs_state) + i915_gem_object_ggtt_unpin(dctx->legacy_hw_ctx.rcs_state); - if (ret) { - DRM_ERROR("ring init context: %d\n", ret); - return ret; - } + i915_gem_context_unreference(dctx); + dev_priv->kernel_context = NULL; - return 0; + ida_destroy(&dev_priv->context_hw_ida); } static int context_idr_cleanup(int id, void *p, void *data) @@ -510,12 +510,13 @@ i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { + struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine = req->engine; u32 flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ - i915_semaphore_is_enabled(engine->dev) ? - hweight32(INTEL_INFO(engine->dev)->ring_mask) - 1 : + i915_semaphore_is_enabled(dev_priv) ? + hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : 0; int len, ret; @@ -524,21 +525,21 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * explicitly, so we rely on the value at ring init, stored in * itlb_before_ctx_switch. */ - if (IS_GEN6(engine->dev)) { + if (IS_GEN6(dev_priv)) { ret = engine->flush(req, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } /* These flags are for resource streamer on HSW+ */ - if (IS_HASWELL(engine->dev) || INTEL_INFO(engine->dev)->gen >= 8) + if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); - else if (INTEL_INFO(engine->dev)->gen < 8) + else if (INTEL_GEN(dev_priv) < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); len = 4; - if (INTEL_INFO(engine->dev)->gen >= 7) + if (INTEL_GEN(dev_priv) >= 7) len += 2 + (num_rings ? 4*num_rings + 6 : 0); ret = intel_ring_begin(req, len); @@ -546,14 +547,14 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) return ret; /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ - if (INTEL_INFO(engine->dev)->gen >= 7) { + if (INTEL_GEN(dev_priv) >= 7) { intel_ring_emit(engine, MI_ARB_ON_OFF | MI_ARB_DISABLE); if (num_rings) { struct intel_engine_cs *signaller; intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(num_rings)); - for_each_engine(signaller, to_i915(engine->dev)) { + for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; @@ -576,14 +577,14 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) */ intel_ring_emit(engine, MI_NOOP); - if (INTEL_INFO(engine->dev)->gen >= 7) { + if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(num_rings)); - for_each_engine(signaller, to_i915(engine->dev)) { + for_each_engine(signaller, dev_priv) { if (signaller == engine) continue; @@ -609,7 +610,37 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) return ret; } -static inline bool skip_rcs_switch(struct intel_engine_cs *engine, +static int remap_l3(struct drm_i915_gem_request *req, int slice) +{ + u32 *remap_info = req->i915->l3_parity.remap_info[slice]; + struct intel_engine_cs *engine = req->engine; + int i, ret; + + if (!remap_info) + return 0; + + ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (ret) + return ret; + + /* + * Note: We do not worry about the concurrent register cacheline hang + * here because no other code should access these registers other than + * at initialization time. + */ + intel_ring_emit(engine, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { + intel_ring_emit_reg(engine, GEN7_L3LOG(slice, i)); + intel_ring_emit(engine, remap_info[i]); + } + intel_ring_emit(engine, MI_NOOP); + intel_ring_advance(engine); + + return 0; +} + +static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, + struct intel_engine_cs *engine, struct intel_context *to) { if (to->remap_slice) @@ -618,36 +649,44 @@ static inline bool skip_rcs_switch(struct intel_engine_cs *engine, if (!to->legacy_hw_ctx.initialized) return false; - if (to->ppgtt && - !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) + if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; return to == engine->last_context; } static bool -needs_pd_load_pre(struct intel_engine_cs *engine, struct intel_context *to) +needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, + struct intel_engine_cs *engine, + struct intel_context *to) { - if (!to->ppgtt) + if (!ppgtt) return false; + /* Always load the ppgtt on first use */ + if (!engine->last_context) + return true; + + /* Same context without new entries, skip */ if (engine->last_context == to && - !(intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings)) + !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; if (engine->id != RCS) return true; - if (INTEL_INFO(engine->dev)->gen < 8) + if (INTEL_GEN(engine->i915) < 8) return true; return false; } static bool -needs_pd_load_post(struct intel_context *to, u32 hw_flags) +needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, + struct intel_context *to, + u32 hw_flags) { - if (!to->ppgtt) + if (!ppgtt) return false; if (!IS_GEN8(to->i915)) @@ -663,16 +702,17 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) { struct intel_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; + struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; struct intel_context *from; u32 hw_flags; int ret, i; - if (skip_rcs_switch(engine, to)) + if (skip_rcs_switch(ppgtt, engine, to)) return 0; /* Trying to pin first makes error handling easier. */ ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state, - get_context_alignment(engine->dev), + get_context_alignment(engine->i915), 0); if (ret) return ret; @@ -698,13 +738,13 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (ret) goto unpin_out; - if (needs_pd_load_pre(engine, to)) { + if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load * Register Immediate commands in Ring Buffer before submitting * a context."*/ trace_switch_mm(engine, to); - ret = to->ppgtt->switch_mm(to->ppgtt, req); + ret = ppgtt->switch_mm(ppgtt, req); if (ret) goto unpin_out; } @@ -715,16 +755,11 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * space. This means we must enforce that a page table load * occur when this occurs. */ hw_flags = MI_RESTORE_INHIBIT; - else if (to->ppgtt && - intel_engine_flag(engine) & to->ppgtt->pd_dirty_rings) + else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings) hw_flags = MI_FORCE_RESTORE; else hw_flags = 0; - /* We should never emit switch_mm more than once */ - WARN_ON(needs_pd_load_pre(engine, to) && - needs_pd_load_post(to, hw_flags)); - if (to != from || (hw_flags & MI_FORCE_RESTORE)) { ret = mi_set_context(req, hw_flags); if (ret) @@ -759,9 +794,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) /* GEN8 does *not* require an explicit reload if the PDPs have been * setup, and we do not wish to move them. */ - if (needs_pd_load_post(to, hw_flags)) { + if (needs_pd_load_post(ppgtt, to, hw_flags)) { trace_switch_mm(engine, to); - ret = to->ppgtt->switch_mm(to->ppgtt, req); + ret = ppgtt->switch_mm(ppgtt, req); /* The hardware context switch is emitted, but we haven't * actually changed the state - so it's probably safe to bail * here. Still, let the user know something dangerous has @@ -771,14 +806,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return ret; } - if (to->ppgtt) - to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); + if (ppgtt) + ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); for (i = 0; i < MAX_L3_SLICES; i++) { if (!(to->remap_slice & (1<<i))) continue; - ret = i915_gem_l3_remap(req, i); + ret = remap_l3(req, i); if (ret) return ret; @@ -825,17 +860,18 @@ int i915_switch_context(struct drm_i915_gem_request *req) if (engine->id != RCS || req->ctx->legacy_hw_ctx.rcs_state == NULL) { struct intel_context *to = req->ctx; + struct i915_hw_ppgtt *ppgtt = + to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - if (needs_pd_load_pre(engine, to)) { + if (needs_pd_load_pre(ppgtt, engine, to)) { int ret; trace_switch_mm(engine, to); - ret = to->ppgtt->switch_mm(to->ppgtt, req); + ret = ppgtt->switch_mm(ppgtt, req); if (ret) return ret; - /* Doing a PD load always reloads the page dirs */ - to->ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); + ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); } if (to != engine->last_context) { @@ -1004,3 +1040,42 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, return ret; } + +int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, + void *data, struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_reset_stats *args = data; + struct i915_ctx_hang_stats *hs; + struct intel_context *ctx; + int ret; + + if (args->flags || args->pad) + return -EINVAL; + + if (args->ctx_id == DEFAULT_CONTEXT_HANDLE && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = i915_mutex_lock_interruptible(dev); + if (ret) + return ret; + + ctx = i915_gem_context_get(file->driver_priv, args->ctx_id); + if (IS_ERR(ctx)) { + mutex_unlock(&dev->struct_mutex); + return PTR_ERR(ctx); + } + hs = &ctx->hang_stats; + + if (capable(CAP_SYS_ADMIN)) + args->reset_count = i915_reset_count(&dev_priv->gpu_error); + else + args->reset_count = 0; + + args->batch_active = hs->batch_active; + args->batch_pending = hs->batch_pending; + + mutex_unlock(&dev->struct_mutex); + + return 0; +} |