diff options
Diffstat (limited to 'drivers/gpu/drm/i915')
55 files changed, 1164 insertions, 389 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 80815a5229a1..eea79125b3ea 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -36,7 +36,6 @@ config DRM_I915_DEBUG select DRM_I915_SELFTEST select DRM_I915_DEBUG_RUNTIME_PM select DRM_I915_DEBUG_MMIO - select BROKEN # for prototype uAPI default n help Choose this option to turn on extra driver debugging that may affect diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 1799537a3228..c280b6ae38eb 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL config DRM_I915_PREEMPT_TIMEOUT int "Preempt timeout (ms, jiffy granularity)" - default 100 # milliseconds + default 640 # milliseconds help How long to wait (in milliseconds) for a preemption event to occur when submitting a new context via execlists. If the current context diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c index 9cd6d2348a1e..c2875b10adf9 100644 --- a/drivers/gpu/drm/i915/display/intel_atomic.c +++ b/drivers/gpu/drm/i915/display/intel_atomic.c @@ -200,6 +200,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fifo_changed = false; + crtc_state->preload_luts = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; crtc_state->update_planes = 0; diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0caef2592a7e..ed8c7ce62119 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk) static u8 ehl_calc_voltage_level(int cdclk) { - if (cdclk > 312000) + if (cdclk > 326400) + return 3; + else if (cdclk > 312000) return 2; else if (cdclk > 180000) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c index fa44eb73d088..aa3a063549c3 100644 --- a/drivers/gpu/drm/i915/display/intel_color.c +++ b/drivers/gpu/drm/i915/display/intel_color.c @@ -1022,6 +1022,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state) dev_priv->display.color_commit(crtc_state); } +static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + return !old_crtc_state->base.gamma_lut && + !old_crtc_state->base.degamma_lut; +} + +static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * CGM_PIPE_MODE is itself single buffered. We'd have to + * somehow split it out from chv_load_luts() if we wanted + * the ability to preload the CGM LUTs/CSC without tearing. + */ + if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode) + return false; + + return !old_crtc_state->base.gamma_lut; +} + +static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc); + struct intel_atomic_state *state = + to_intel_atomic_state(new_crtc_state->base.state); + const struct intel_crtc_state *old_crtc_state = + intel_atomic_get_old_crtc_state(state, crtc); + + /* + * The hardware degamma is active whenever the pipe + * CSC is active. Thus even if the old state has no + * software degamma we need to avoid clobbering the + * linear hardware degamma mid scanout. + */ + return !old_crtc_state->csc_enable && + !old_crtc_state->base.gamma_lut; +} + int intel_color_check(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); @@ -1165,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1217,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = chv_can_preload_luts(crtc_state); + return 0; } @@ -1271,6 +1324,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1328,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } @@ -1366,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state) if (ret) return ret; + crtc_state->preload_luts = glk_can_preload_luts(crtc_state); + return 0; } @@ -1415,6 +1474,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state) crtc_state->csc_mode = icl_csc_mode(crtc_state); + crtc_state->preload_luts = intel_can_preload_luts(crtc_state); + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index b51f244ad7a5..c7c2b349858d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans { u32 dkl_de_emphasis_control; }; -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { /* VS pre-emp Non-trans mV Pre-emph dB */ { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ @@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ }; +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { + /* HDMI Preset VS Pre-emph */ + { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ + { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ + { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ + { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ + { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ + { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ + { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ + { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ + { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ + { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); default_entry = n_entries - 1; } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) @@ -1794,10 +1808,8 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state, * of Color Encoding Format and Content Color Gamut] while sending * YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields * which indicate VSC SDP for the Pixel Encoding/Colorimetry Format. - * - * FIXME MST doesn't pass in the conn_state */ - if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state)) + if (intel_dp_needs_vsc_sdp(crtc_state, conn_state)) temp |= DP_MSA_MISC_COLOR_VSC_SDP; I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp); @@ -2373,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, encoder->type, @@ -2825,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); - ddi_translations = tgl_dkl_phy_ddi_translations; + if (encoder->type == INTEL_OUTPUT_HDMI) { + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; + } else { + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + ddi_translations = tgl_dkl_phy_dp_ddi_trans; + } if (level >= n_entries) level = n_entries - 1; @@ -3605,7 +3622,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, else hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state); - intel_ddi_set_dp_msa(crtc_state, conn_state); + /* MST will call a setting of MSA after an allocating of Virtual Channel + * from MST encoder pre_enable callback. + */ + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST)) + intel_ddi_set_dp_msa(crtc_state, conn_state); } static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 348ce0456696..6f5e3bd13ad1 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -66,6 +66,7 @@ #include "intel_cdclk.h" #include "intel_color.h" #include "intel_display_types.h" +#include "intel_dp_link_training.h" #include "intel_fbc.h" #include "intel_fbdev.h" #include "intel_fifo_underrun.h" @@ -2528,6 +2529,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv, * the highest stride limits of them all. */ crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); + if (!crtc) + return 0; + plane = to_intel_plane(crtc->base.primary); return plane->max_stride(plane, pixel_format, modifier, @@ -14201,6 +14205,11 @@ static void intel_update_crtc(struct intel_crtc *crtc, /* vblanks work again, re-enable pipe CRC. */ intel_crtc_enable_pipe_crc(crtc); } else { + if (new_crtc_state->preload_luts && + (new_crtc_state->base.color_mgmt_changed || + new_crtc_state->update_pipe)) + intel_color_load_luts(new_crtc_state); + intel_pre_plane_update(old_crtc_state, new_crtc_state); if (new_crtc_state->update_pipe) @@ -14713,6 +14722,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { if (new_crtc_state->base.active && !needs_modeset(new_crtc_state) && + !new_crtc_state->preload_luts && (new_crtc_state->base.color_mgmt_changed || new_crtc_state->update_pipe)) intel_color_load_luts(new_crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h index 355c50088589..f417e0948001 100644 --- a/drivers/gpu/drm/i915/display/intel_display.h +++ b/drivers/gpu/drm/i915/display/intel_display.h @@ -27,7 +27,6 @@ #include <drm/drm_util.h> #include <drm/i915_drm.h> -#include "intel_dp_link_training.h" enum link_m_n_set; struct dpll; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 4341bd66a418..1a7334dbe802 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -775,6 +775,7 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fifo_changed; /* FIFO split is changed */ + bool preload_luts; /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 9ae5b8b6bbbc..03d1cba0b696 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -331,6 +331,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr); intel_ddi_enable_pipe_clock(pipe_config); + + intel_ddi_set_dp_msa(pipe_config, conn_state); } static void intel_mst_enable_dp(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c index 3d1061470e76..48c960ca12fb 100644 --- a/drivers/gpu/drm/i915/display/intel_fbdev.c +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c @@ -234,6 +234,11 @@ static int intelfb_create(struct drm_fb_helper *helper, info->apertures->ranges[0].base = ggtt->gmadr.start; info->apertures->ranges[0].size = ggtt->mappable_end; + /* Our framebuffer is the entirety of fbdev's system memory */ + info->fix.smem_start = + (unsigned long)(ggtt->gmadr.start + vma->node.start); + info->fix.smem_len = vma->node.size; + vaddr = i915_vma_pin_iomap(vma); if (IS_ERR(vaddr)) { DRM_ERROR("Failed to remap framebuffer into virtual memory\n"); @@ -243,10 +248,6 @@ static int intelfb_create(struct drm_fb_helper *helper, info->screen_base = vaddr; info->screen_size = vma->node.size; - /* Our framebuffer is the entirety of fbdev's system memory */ - info->fix.smem_start = (unsigned long)info->screen_base; - info->fix.smem_len = info->screen_size; - drm_fb_helper_fill_info(info, &ifbdev->helper, sizes); /* If the object is shmemfs backed, it will have given us zeroed pages. diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c index edc41fc40726..72fda0430062 100644 --- a/drivers/gpu/drm/i915/display/intel_sprite.c +++ b/drivers/gpu/drm/i915/display/intel_sprite.c @@ -2885,7 +2885,7 @@ struct intel_plane * skl_universal_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id) { - static const struct drm_plane_funcs *plane_funcs; + const struct drm_plane_funcs *plane_funcs; struct intel_plane *plane; enum drm_plane_type plane_type; unsigned int supported_rotations; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index de6e55af82cf..4237a2887ff2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -236,6 +236,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) free_engines(rcu_access_pointer(ctx->engines)); mutex_destroy(&ctx->engines_mutex); + kfree(ctx->jump_whitelist); + if (ctx->timeline) intel_timeline_put(ctx->timeline); @@ -366,7 +368,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (!ce->timeline) return NULL; - rcu_read_lock(); + mutex_lock(&ce->timeline->mutex); list_for_each_entry_reverse(rq, &ce->timeline->requests, link) { if (i915_request_completed(rq)) break; @@ -376,7 +378,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) if (engine) break; } - rcu_read_unlock(); + mutex_unlock(&ce->timeline->mutex); return engine; } @@ -527,6 +529,9 @@ __create_context(struct drm_i915_private *i915) for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; + ctx->jump_whitelist = NULL; + ctx->jump_whitelist_cmds = 0; + spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); @@ -722,6 +727,7 @@ int i915_gem_init_contexts(struct drm_i915_private *i915) void i915_gem_driver_release__contexts(struct drm_i915_private *i915) { destroy_kernel_context(&i915->kernel_context); + flush_work(&i915->gem.contexts.free_work); } static int context_idr_cleanup(int id, void *p, void *data) @@ -1136,7 +1142,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv, if (i915_gem_context_is_closed(ctx)) { err = -ENOENT; - goto out; + goto unlock; } if (vm == rcu_access_pointer(ctx->vm)) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 861d7d92fe9f..3870dd5daaa0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -176,6 +176,13 @@ struct i915_gem_context { * per vm, which may be one per context or shared with the global GTT) */ struct radix_tree_root handles_vma; + + /** jump_whitelist: Bit array for tracking cmds during cmdparsing + * Guarded by struct_mutex + */ + unsigned long *jump_whitelist; + /** jump_whitelist_cmds: No of cmd slots available */ + u32 jump_whitelist_cmds; }; #endif /* __I915_GEM_CONTEXT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index e4f5c269150a..f0998f1225af 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -298,7 +298,9 @@ static inline u64 gen8_noncanonical_addr(u64 address) static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) { - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); } static int eb_create(struct i915_execbuffer *eb) @@ -1990,40 +1992,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma * +shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = eb->i915; + struct i915_vma * const vma = *eb->vma; + struct i915_address_space *vm; + u64 flags; + + /* + * PPGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.vm; + } else if (vma->vm->has_read_only) { + flags = PIN_USER; + vm = vma->vm; + i915_gem_object_set_readonly(obj); + } else { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return ERR_PTR(-EINVAL); + } + + return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); +} + +static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct intel_engine_pool_node *pool; struct i915_vma *vma; + u64 batch_start; + u64 shadow_batch_start; int err; pool = intel_engine_get_pool(eb->engine, eb->batch_len); if (IS_ERR(pool)) return ERR_CAST(pool); - err = intel_engine_cmd_parser(eb->engine, + vma = shadow_batch_pin(eb, pool->obj); + if (IS_ERR(vma)) + goto err; + + batch_start = gen8_canonical_addr(eb->batch->node.start) + + eb->batch_start_offset; + + shadow_batch_start = gen8_canonical_addr(vma->node.start); + + err = intel_engine_cmd_parser(eb->gem_context, + eb->engine, eb->batch->obj, - pool->obj, + batch_start, eb->batch_start_offset, eb->batch_len, - is_master); + pool->obj, + shadow_batch_start); + if (err) { - if (err == -EACCES) /* unhandled chained batch */ + i915_vma_unpin(vma); + + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. + * For PPGTT backing however, we have no choice but to forcibly + * reject unsafe buffers + */ + if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) + /* Execute original buffer non-secure */ vma = NULL; else vma = ERR_PTR(err); goto err; } - vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto err; - eb->vma[eb->buffer_count] = i915_vma_get(vma); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; vma->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; + + if (CMDPARSER_USES_GGTT(eb->i915)) + eb->batch_flags |= I915_DISPATCH_SECURE; + + /* eb->batch_len unchanged */ + vma->private = pool; return vma; @@ -2430,6 +2486,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; @@ -2441,7 +2498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = i915; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2461,8 +2518,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(i915) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(i915)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } @@ -2539,34 +2603,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; - vma = eb_parse(&eb, drm_is_current_master(file)); + vma = eb_parse(&eb); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_vma; } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } } - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 1e045c337044..4c72d74d6576 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -646,8 +646,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, obj->mm.dirty = false; for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. + */ set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..ef7bc41ffffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; + /* + * Ideally, we just want to insert our foreign fence as + * a barrier into the remove context, such that this operation + * occurs after all current operations in that context, and + * all future operations must occur after this. + * + * Currently, the timeline->last_request tracking is guarded + * by its mutex and so we must obtain that to atomically + * insert our barrier. However, since we already hold our + * timeline->mutex, we must be careful against potential + * inversion if we are the kernel_context as the remote context + * will itself poke at the kernel_context when it needs to + * unpin. Ergo, if already locked, we drop both locks and + * try again (through the magic of userspace repeating EAGAIN). + */ + if (!mutex_trylock(&tl->mutex)) + return -EAGAIN; /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index bc3b72bfa9e3..01765a7ec18f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - return READ_ONCE(*execlists->active); + return *READ_ONCE(execlists->active); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index f8113bc756c6..813bd3a610d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,13 +28,13 @@ #include "i915_drv.h" -#include "gt/intel_gt.h" - +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_engine_user.h" -#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" #include "intel_ring.h" @@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); @@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); + intel_engine_fini_retire(engine); intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); @@ -1372,6 +1374,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, } execlists_active_lock_bh(execlists); + rcu_read_lock(); for (port = execlists->active; (rq = *port); port++) { char hdr[80]; int len; @@ -1409,6 +1412,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, if (tl) intel_timeline_put(tl); } + rcu_read_unlock(); execlists_active_unlock_bh(execlists); } else if (INTEL_GEN(dev_priv) > 6) { drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 5051f304705b..06aa14c7aa8c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -141,8 +141,8 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine) void intel_engine_park_heartbeat(struct intel_engine_cs *engine) { - cancel_delayed_work(&engine->heartbeat.work); - i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); + if (cancel_delayed_work(&engine->heartbeat.work)) + i915_request_put(fetch_and_zero(&engine->heartbeat.systole)); } void intel_engine_init_heartbeat(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3c0f490ff2c7..0e1ad4a4bd97 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + GEM_TRACE("%s\n", engine->name); + + /* + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! + */ + spin_lock(&timelines->lock); + + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; @@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); @@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); - __i915_request_queue(rq, NULL); + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } @@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->gt); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 739c50fefcef..24e20344dc22 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine) intel_wakeref_put(&engine->wakeref); } +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c5d1047a4bc5..17f1f1441efc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -451,16 +451,25 @@ struct intel_engine_cs { struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; -#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +#define I915_ENGINE_USING_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) #define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) unsigned int flags; /* @@ -528,9 +537,15 @@ struct intel_engine_cs { }; static inline bool -intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) { - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; } static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 060a27d9af34..a459a42ad5c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -61,6 +61,9 @@ static int __gt_unpark(struct intel_wakeref *wf) gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); GEM_BUG_ON(!gt->awake); + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); + intel_rps_unpark(>->rps); i915_pmu_gt_unparked(i915); @@ -86,6 +89,11 @@ static int __gt_park(struct intel_wakeref *wf) /* Everything switched off, flush any residual interrupt just in case */ intel_synchronize_irq(i915); + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { + intel_rc6_ctx_wa_check(&i915->gt.rc6); + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); + } + GEM_BUG_ON(!wakeref); intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref); @@ -97,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf) static const struct intel_wakeref_ops wf_ops = { .get = __gt_unpark, .put = __gt_park, - .flags = INTEL_WAKEREF_PUT_ASYNC, }; void intel_gt_pm_init_early(struct intel_gt *gt) @@ -264,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) static suspend_state_t pm_suspend_target(void) { -#if IS_ENABLED(CONFIG_PM_SLEEP) +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) return pm_suspend_target_state; #else return PM_SUSPEND_TO_IDLE; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b3e17399be9b..990efc27a4e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt) intel_wakeref_put(>->wakeref); } +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index b73229a84d85..3dc13ecf41bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include <linux/workqueue.h> + #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" @@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt) intel_engine_flush_submission(engine); } +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + * + * However, we rely on that a timeline can only be active on a single + * engine at any one time and that add_retire() is called before the + * engine releases the timeline and transferred to another to retire. + */ + + if (READ_ONCE(tl->retire)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; @@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); if (timeout > 0) { @@ -74,16 +149,16 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (--tl->active_count) - active_count += !!rcu_access_pointer(tl->last_request.fence); - else + if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); + else + active_count += !!rcu_access_pointer(tl->last_request.fence); mutex_unlock(&tl->mutex); /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); + GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index bd31cbce47e0..d626fb115386 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -7,7 +7,9 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +struct intel_engine_cs; struct intel_gt; +struct intel_timeline; long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) @@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt) intel_gt_retire_requests_timeout(gt, 0); } +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51aef2a233cb..9fdefbdc3546 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -142,6 +142,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -990,6 +991,59 @@ static void intel_engine_context_out(struct intel_engine_cs *engine) write_sequnlock_irqrestore(&engine->stats.lock, flags); } +static void restore_default_state(struct intel_context *ce, + struct intel_engine_cs *engine) +{ + u32 *regs = ce->lrc_reg_state; + + if (engine->pinned_default_state) + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + + execlists_init_reg_state(regs, ce, engine, ce->ring, false); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->hw_context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", + __func__, engine->name, rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (i915_request_completed(rq)) + head = rq->tail; + else + head = active_request(ce->timeline, rq)->head; + ce->ring->head = intel_ring_wrap(ce->ring, head); + intel_ring_update_space(ce->ring); + + /* Scrub the context image to prevent replaying the previous batch */ + restore_default_state(ce, engine); + __execlists_update_reg_state(ce, engine); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -998,6 +1052,9 @@ __execlists_schedule_in(struct i915_request *rq) intel_context_get(ce); + if (unlikely(i915_gem_context_is_banned(ce->gem_context))) + reset_active(rq, engine); + if (ce->tag) { /* Use a fixed tag for OA and friends */ ce->lrc_desc |= (u64)ce->tag << 32; @@ -1047,71 +1104,29 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce) tasklet_schedule(&ve->base.execlists.tasklet); } -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE, - engine->context_size - PAGE_SIZE); - - execlists_init_reg_state(regs, ce, engine, ce->ring, false); -} - -static void reset_active(struct i915_request *rq, - struct intel_engine_cs *engine) +static inline void +__execlists_schedule_out(struct i915_request *rq, + struct intel_engine_cs * const engine) { struct intel_context * const ce = rq->hw_context; - u32 head; /* - * The executing context has been cancelled. We want to prevent - * further execution along this context and propagate the error on - * to anything depending on its results. - * - * In __i915_request_submit(), we apply the -EIO and remove the - * requests' payloads for any banned requests. But first, we must - * rewind the context back to the start of the incomplete request so - * that we do not jump back into the middle of the batch. - * - * We preserve the breadcrumbs and semaphores of the incomplete - * requests so that inter-timeline dependencies (i.e other timelines) - * remain correctly ordered. And we defer to __i915_request_submit() - * so that all asynchronous waits are correctly handled. + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. */ - GEM_TRACE("%s(%s): { rq=%llx:%lld }\n", - __func__, engine->name, rq->fence.context, rq->fence.seqno); - /* On resubmission of the active request, payload will be scrubbed */ - if (i915_request_completed(rq)) - head = rq->tail; - else - head = active_request(ce->timeline, rq)->head; - ce->ring->head = intel_ring_wrap(ce->ring, head); - intel_ring_update_space(ce->ring); - - /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine); - - /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; -} - -static inline void -__execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine) -{ - struct intel_context * const ce = rq->hw_context; + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - intel_gt_pm_put(engine->gt); - - if (unlikely(i915_gem_context_is_banned(ce->gem_context))) - reset_active(rq, engine); + intel_gt_pm_put_async(engine->gt); /* * If this is part of a virtual engine, its next request may @@ -1931,16 +1946,17 @@ skip_submit: static void cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct i915_request * const *port, *rq; + struct i915_request * const *port; - for (port = execlists->pending; (rq = *port); port++) - execlists_schedule_out(rq); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); memset(execlists->pending, 0, sizeof(execlists->pending)); - for (port = execlists->active; (rq = *port); port++) - execlists_schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + WRITE_ONCE(execlists->active, + memset(execlists->inflight, 0, sizeof(execlists->inflight))); } static inline void @@ -2093,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + set_timeslice(engine); + if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) - execlists_schedule_out(*execlists->active++); + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - execlists->active = - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); - - set_timeslice(engine); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); WRITE_ONCE(execlists->pending[0], NULL); } else { diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 5ad4a92a9582..700104b90163 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -178,8 +178,13 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_RC6_ENABLE | rc6_mode); - set(uncore, GEN9_PG_ENABLE, - GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); + /* + * WaRsDisableCoarsePowerGating:skl,cnl + * - Render/Media PG need to be disabled with RC6. + */ + if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6))) + set(uncore, GEN9_PG_ENABLE, + GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE); } static void gen8_rc6_enable(struct intel_rc6 *rc6) @@ -486,6 +491,66 @@ static void rpm_put(struct intel_rc6 *rc6) rc6->wakeref = false; } +static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6) +{ + return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO); +} + +static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + rc6->ctx_corrupted = true; + } +} + +/** + * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @rc6: rc6 state + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6) +{ + if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) { + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + rc6->ctx_corrupted = false; + } +} + +/** + * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @rc6: rc6 state + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. +*/ +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6) +{ + struct drm_i915_private *i915 = rc6_to_i915(rc6); + + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (rc6->ctx_corrupted) + return; + + if (!intel_rc6_ctx_corrupted(rc6)) + return; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_rc6_disable(rc6); + rc6->ctx_corrupted = true; + + return; +} + static void __intel_rc6_disable(struct intel_rc6 *rc6) { struct drm_i915_private *i915 = rc6_to_i915(rc6); @@ -510,6 +575,8 @@ void intel_rc6_init(struct intel_rc6 *rc6) if (!rc6_supported(rc6)) return; + intel_rc6_ctx_wa_init(rc6); + if (IS_CHERRYVIEW(i915)) err = chv_rc6_init(rc6); else if (IS_VALLEYVIEW(i915)) @@ -544,6 +611,9 @@ void intel_rc6_enable(struct intel_rc6 *rc6) GEM_BUG_ON(rc6->enabled); + if (rc6->ctx_corrupted) + return; + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); if (IS_CHERRYVIEW(i915)) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h index 5e6711f36457..1370f6834a4c 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6.h @@ -22,4 +22,7 @@ void intel_rc6_disable(struct intel_rc6 *rc6); u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg); u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg); +void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6); +void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6); + #endif /* INTEL_RC6_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h index 214f354d6ae4..89ad5697a8d4 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h @@ -23,6 +23,7 @@ struct intel_rc6 { bool supported : 1; bool enabled : 1; bool wakeref : 1; + bool ctx_corrupted : 1; }; #endif /* INTEL_RC6_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..c97423a76642 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index ece20504d240..374b28f13ca0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring) i915_vma_make_unshrinkable(vma); - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + ring->vaddr = addr; return 0; err_ring: @@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring) if (!atomic_dec_and_test(&ring->pin_count)) return; - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj); - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 14ad10acd548..649798c184fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); @@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); - if (tl->active_count++) + + if (atomic_add_unless(&tl->active_count, 1, 0)) return; - GEM_BUG_ON(!tl->active_count); /* overflow? */ spin_lock_irqsave(&timelines->lock, flags); - list_add(&tl->link, &timelines->active_list); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); spin_unlock_irqrestore(&timelines->lock, flags); } @@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!tl->active_count); - if (--tl->active_count) + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) return; spin_lock_irqsave(&timelines->lock, flags); - list_del(&tl->link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); spin_unlock_irqrestore(&timelines->lock, flags); /* diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 98d9ee166379..aaf15cbe1ce1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -42,7 +42,7 @@ struct intel_timeline { * from the intel_context caller plus internal atomicity. */ atomic_t pin_count; - unsigned int active_count; + atomic_t active_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; @@ -66,6 +66,9 @@ struct intel_timeline { */ struct i915_active_fence last_request; + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + /** * We track the most recent seqno that we wait on in every context so * that we only have to emit a new await and dependency on a more diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 20b9c83f43ad..cbf6b0735272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg) pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", engine->name, p->name); else - intel_engine_pm_put(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); p->critical_section_end(); - /* engine wakeref is sync (instant) */ + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { pr_err("%s is still awake after flushing pm\n", engine->name); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 019ae6486e8d..3ee4a4e7689d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -554,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc) }; /* + * If GuC communication is enabled but submission is not supported, + * we do not need to suspend the GuC. + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + + /* * The ENTER_S_STATE action queues the save/restore operation in GuC FW * and then returns, so waiting on the H2G is not enough to guarantee * GuC is done. When all the processing is done, GuC writes @@ -610,6 +617,14 @@ int intel_guc_resume(struct intel_guc *guc) GUC_POWER_D0, }; + /* + * If GuC communication is enabled but submission is not supported, + * we do not need to resume the GuC but we do need to enable the + * GuC communication on resume (above). + */ + if (!intel_guc_submission_is_enabled(guc)) + return 0; + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 207383dda84d..dca15ace88f6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -91,14 +91,15 @@ static void debug_active_init(struct i915_active *ref) static void debug_active_activate(struct i915_active *ref) { - lockdep_assert_held(&ref->mutex); + spin_lock_irq(&ref->tree_lock); if (!atomic_read(&ref->count)) /* before the first inc */ debug_object_activate(ref, &active_debug_desc); + spin_unlock_irq(&ref->tree_lock); } static void debug_active_deactivate(struct i915_active *ref) { - lockdep_assert_held(&ref->mutex); + lockdep_assert_held(&ref->tree_lock); if (!atomic_read(&ref->count)) /* after the last dec */ debug_object_deactivate(ref, &active_debug_desc); } @@ -128,29 +129,22 @@ __active_retire(struct i915_active *ref) { struct active_node *it, *n; struct rb_root root; - bool retire = false; + unsigned long flags; - lockdep_assert_held(&ref->mutex); GEM_BUG_ON(i915_active_is_idle(ref)); /* return the unused nodes to our slabcache -- flushing the allocator */ - if (atomic_dec_and_test(&ref->count)) { - debug_active_deactivate(ref); - root = ref->tree; - ref->tree = RB_ROOT; - ref->cache = NULL; - retire = true; - } - - mutex_unlock(&ref->mutex); - if (!retire) + if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags)) return; GEM_BUG_ON(rcu_access_pointer(ref->excl.fence)); - rbtree_postorder_for_each_entry_safe(it, n, &root, node) { - GEM_BUG_ON(i915_active_fence_isset(&it->base)); - kmem_cache_free(global.slab_cache, it); - } + debug_active_deactivate(ref); + + root = ref->tree; + ref->tree = RB_ROOT; + ref->cache = NULL; + + spin_unlock_irqrestore(&ref->tree_lock, flags); /* After the final retire, the entire struct may be freed */ if (ref->retire) @@ -158,6 +152,11 @@ __active_retire(struct i915_active *ref) /* ... except if you wait on it, you must manage your own references! */ wake_up_var(ref); + + rbtree_postorder_for_each_entry_safe(it, n, &root, node) { + GEM_BUG_ON(i915_active_fence_isset(&it->base)); + kmem_cache_free(global.slab_cache, it); + } } static void @@ -169,7 +168,6 @@ active_work(struct work_struct *wrk) if (atomic_add_unless(&ref->count, -1, 1)) return; - mutex_lock(&ref->mutex); __active_retire(ref); } @@ -180,9 +178,7 @@ active_retire(struct i915_active *ref) if (atomic_add_unless(&ref->count, -1, 1)) return; - /* If we are inside interrupt context (fence signaling), defer */ - if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS || - !mutex_trylock(&ref->mutex)) { + if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) { queue_work(system_unbound_wq, &ref->work); return; } @@ -227,7 +223,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) if (!prealloc) return NULL; - mutex_lock(&ref->mutex); + spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); parent = NULL; @@ -257,7 +253,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl) out: ref->cache = node; - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); BUILD_BUG_ON(offsetof(typeof(*node), base)); return &node->base; @@ -278,8 +274,10 @@ void __i915_active_init(struct i915_active *ref, if (bits & I915_ACTIVE_MAY_SLEEP) ref->flags |= I915_ACTIVE_RETIRE_SLEEPS; + spin_lock_init(&ref->tree_lock); ref->tree = RB_ROOT; ref->cache = NULL; + init_llist_head(&ref->preallocated_barriers); atomic_set(&ref->count, 0); __mutex_init(&ref->mutex, "i915_active", key); @@ -510,7 +508,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) if (RB_EMPTY_ROOT(&ref->tree)) return NULL; - mutex_lock(&ref->mutex); + spin_lock_irq(&ref->tree_lock); GEM_BUG_ON(i915_active_is_idle(ref)); /* @@ -575,7 +573,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx) goto match; } - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); return NULL; @@ -583,7 +581,7 @@ match: rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */ if (p == &ref->cache->node) ref->cache = NULL; - mutex_unlock(&ref->mutex); + spin_unlock_irq(&ref->tree_lock); return rb_entry(p, struct active_node, node); } @@ -664,6 +662,7 @@ unwind: void i915_active_acquire_barrier(struct i915_active *ref) { struct llist_node *pos, *next; + unsigned long flags; GEM_BUG_ON(i915_active_is_idle(ref)); @@ -673,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref) * populated by i915_request_add_active_barriers() to point to the * request that will eventually release them. */ - mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING); llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { struct active_node *node = barrier_from_ll(pos); struct intel_engine_cs *engine = barrier_to_engine(node); struct rb_node **p, *parent; + spin_lock_irqsave_nested(&ref->tree_lock, flags, + SINGLE_DEPTH_NESTING); parent = NULL; p = &ref->tree.rb_node; while (*p) { @@ -694,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref) } rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + spin_unlock_irqrestore(&ref->tree_lock, flags); GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } - mutex_unlock(&ref->mutex); } void i915_request_add_active_barriers(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h index d89a74c142c6..96aed0ee700a 100644 --- a/drivers/gpu/drm/i915/i915_active_types.h +++ b/drivers/gpu/drm/i915/i915_active_types.h @@ -48,6 +48,7 @@ struct i915_active { atomic_t count; struct mutex mutex; + spinlock_t tree_lock; struct active_node *cache; struct rb_root tree; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 24555102e198..f24096e27bef 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -53,13 +53,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -84,9 +82,9 @@ * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ /* @@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_REJECT: The command is never allowed * CMD_DESC_REGISTER: The command should be checked against the * register whitelist for the appropriate ring - * CMD_DESC_MASTER: The command is allowed if the submitting process - * is the DRM master */ u32 flags; #define CMD_DESC_FIXED (1<<0) @@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_REJECT (1<<2) #define CMD_DESC_REGISTER (1<<3) #define CMD_DESC_BITMASK (1<<4) -#define CMD_DESC_MASTER (1<<5) /* * The command's unique identification bits and the bitmask to get them. @@ -194,7 +189,7 @@ struct drm_i915_cmd_table { #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), ~0u << (opm) }, \ + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -209,14 +204,13 @@ struct drm_i915_cmd_table { #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), @@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ +#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) +#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_BB_START_OPERAND_MASK, + .expected = MI_BB_START_OPERAND_EXPECT, + }}, ), +}; + static const struct drm_i915_cmd_descriptor noop_desc = CMD(MI_NOOP, SMI, F, 1, S); @@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef R #undef W #undef B -#undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), }; #undef REG64 @@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = { struct drm_i915_reg_table { const struct drm_i915_reg_descriptor *regs; int num_regs; - bool master; }; static const struct drm_i915_reg_table ivb_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, }; static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static const struct drm_i915_reg_table hsw_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, }; static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, +}; + +static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, }; static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) @@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) int cmd_table_count; int ret; - if (!IS_GEN(engine->i915, 7)) + if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && + engine->class == COPY_ENGINE_CLASS)) return; switch (engine->class) { case RENDER_CLASS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->reg_tables = ivb_render_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); } - engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VIDEO_DECODE_CLASS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case COPY_ENGINE_CLASS: - if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN(engine->i915, 9)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + engine->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; + } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_HASWELL(engine->i915)) { + if (IS_GEN(engine->i915, 9)) { + engine->reg_tables = gen9_blt_reg_tables; + engine->reg_table_count = + ARRAY_SIZE(gen9_blt_reg_tables); + } else if (IS_HASWELL(engine->i915)) { engine->reg_tables = hsw_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); } else { engine->reg_tables = ivb_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); } - - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VIDEO_ENHANCEMENT_CLASS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; @@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; + engine->flags |= I915_ENGINE_USING_CMD_PARSER; } /** @@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!intel_engine_needs_cmd_parser(engine)) + if (!intel_engine_using_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) } static const struct drm_i915_reg_descriptor * -find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, u32 addr) { const struct drm_i915_reg_table *table = engine->reg_tables; + const struct drm_i915_reg_descriptor *reg = NULL; int count = engine->reg_table_count; - for (; count > 0; ++table, --count) { - if (!table->master || is_master) { - const struct drm_i915_reg_descriptor *reg; + for (; !reg && (count > 0); ++table, --count) + reg = __find_reg(table->regs, table->num_regs, addr); - reg = __find_reg(table->regs, table->num_regs, addr); - if (reg != NULL) - return reg; - } - } - - return NULL; + return reg; } /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ @@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, u32 length, - const bool is_master) + const u32 *cmd, u32 length) { if (desc->flags & CMD_DESC_SKIP) return true; @@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, return false; } - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); - return false; - } - if (desc->flags & CMD_DESC_REGISTER) { /* * Get the distance between individual register offset @@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg(engine, is_master, reg_addr); + find_reg(engine, reg_addr); if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", @@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine, return true; } +static int check_bbstart(const struct i915_gem_context *ctx, + u32 *cmd, u32 offset, u32 length, + u32 batch_len, + u64 batch_start, + u64 shadow_batch_start) +{ + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (CMDPARSER_USES_GGTT(ctx->i915)) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64*)(cmd+1); + jump_offset = jump_target - batch_start; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_len) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_len is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (ctx->jump_whitelist_cmds <= target_cmd_index) { + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); + return -EINVAL; + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) +{ + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); + const u32 exact_size = BITS_TO_LONGS(batch_cmds); + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); + unsigned long *next_whitelist; + + if (CMDPARSER_USES_GGTT(ctx->i915)) + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { + bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +again: + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); + if (next_whitelist) { + kfree(ctx->jump_whitelist); + ctx->jump_whitelist = next_whitelist; + ctx->jump_whitelist_cmds = + next_size * BITS_PER_BYTE * sizeof(long); + return; + } + + if (next_size > exact_size) { + next_size = exact_size; + goto again; + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; +} + #define LENGTH_BIAS 2 /** * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ctx: the context in which the batch is to execute * @engine: the engine on which the batch is to execute * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start: Canonical base address of batch * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? + * @shadow_batch_obj: copy of the batch buffer in question + * @shadow_batch_start: Canonical base address of shadow_batch_obj * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int intel_engine_cmd_parser(struct intel_engine_cs *engine, + +int intel_engine_cmd_parser(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master) + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start) { - u32 *cmd, *batch_end; + u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; bool needs_clflush_after = false; @@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, return PTR_ERR(cmd); } + init_whitelist(ctx, batch_len); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra @@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) { - if (needs_clflush_after) { - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); - drm_clflush_virt_range(ptr, - (void *)(cmd + 1) - ptr); - } + if (*cmd == MI_BATCH_BUFFER_END) break; - } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); ret = -EINVAL; - break; - } - - /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - break; + goto err; } if (desc->flags & CMD_DESC_FIXED) @@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, length, batch_end - cmd); ret = -EINVAL; - break; + goto err; } - if (!check_cmd(engine, desc, cmd, length, is_master)) { + if (!check_cmd(engine, desc, cmd, length)) { ret = -EACCES; + goto err; + } + + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(ctx, cmd, offset, length, + batch_len, batch_start, + shadow_batch_start); + + if (ret) + goto err; break; } + if (ctx->jump_whitelist_cmds > offset) + set_bit(offset, ctx->jump_whitelist); + cmd += length; + offset += length; if (cmd >= batch_end) { DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; - break; + goto err; } } while (1); + if (needs_clflush_after) { + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); + + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); + } + +err: i915_gem_object_unpin_map(shadow_batch_obj); return ret; } @@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_uabi_engine(engine, dev_priv) { - if (intel_engine_needs_cmd_parser(engine)) { + if (intel_engine_using_cmd_parser(engine)) { active = true; break; } @@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) * the parser enabled. * 9. Don't whitelist or handle oacontrol specially, as ownership * for oacontrol state is moving to i915-perf. + * 10. Support for Gen9 BCS Parsing */ - return 9; + return 10; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ccb5b566795f..87e05ca3646a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -63,6 +63,7 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" +#include "gt/intel_rc6.h" #include "i915_debugfs.h" #include "i915_drv.h" @@ -1819,6 +1820,8 @@ static int i915_drm_resume(struct drm_device *dev) disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); + intel_rc6_ctx_wa_resume(&dev_priv->gt.rc6); + intel_gt_sanitize(&dev_priv->gt, true); ret = i915_ggtt_enable_hw(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0567fe67e8c5..e29bc137e7ba 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1614,9 +1614,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define VEBOX_MASK(dev_priv) \ ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7) + #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) @@ -1649,10 +1656,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ + (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) + /* WaRsDisableCoarsePowerGating:skl,cnl */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_CANNONLAKE(dev_priv) || \ - IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9)) #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ @@ -1834,6 +1843,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, unsigned long flags); #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) +struct i915_vma * __must_check +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags); + void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); static inline int __must_check @@ -1939,12 +1956,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -int intel_engine_cmd_parser(struct intel_engine_cs *engine, +int intel_engine_cmd_parser(struct i915_gem_context *cxt, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master); + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start); /* intel_device_info.c */ static inline struct intel_device_info * @@ -2026,4 +2045,10 @@ i915_coherent_map_type(struct drm_i915_private *i915) return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } +static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc) +{ + return intel_guc_is_submission_supported(guc) && + intel_guc_is_running(guc); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ac23322fb382..b9eb6b3149b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -893,6 +893,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_address_space *vm = &dev_priv->ggtt.vm; + + return i915_gem_object_pin(obj, vm, view, size, alignment, + flags | PIN_GLOBAL); +} + +struct i915_vma * +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int ret; @@ -958,7 +972,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin(vma, size, alignment, flags); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 88179202c556..6239a9adbf14 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2609,8 +2609,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma); ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; - ppgtt->vm.total = ggtt->vm.total; - return 0; err_ppgtt: diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index ad33fbe90a28..cf8a8c3ef047 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -63,7 +63,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); break; case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); + value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN); break; case I915_PARAM_CMD_PARSER_VERSION: value = i915_cmd_parser_get_version(i915); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e8b67f5e521d..3c85cb0ee99f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1029,9 +1029,9 @@ i915_error_object_create(struct drm_i915_private *i915, for_each_sgt_daddr(dma, iter, vma->pages) { void __iomem *s; - s = io_mapping_map_atomic_wc(&mem->iomap, dma); + s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE); ret = compress_page(compress, (void __force *)s, dst); - io_mapping_unmap_atomic(s); + io_mapping_unmap(s); if (ret) break; } @@ -1043,9 +1043,9 @@ i915_error_object_create(struct drm_i915_private *i915, drm_clflush_pages(&page, 1); - s = kmap_atomic(page); + s = kmap(page); ret = compress_page(compress, s, dst); - kunmap_atomic(s); + kunmap(s); drm_clflush_pages(&page, 1); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a8c2318d3d5e..65d7c2e599de 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1870,7 +1870,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, config_length += num_lri_dwords(oa_config->mux_regs_len); config_length += num_lri_dwords(oa_config->b_counter_regs_len); config_length += num_lri_dwords(oa_config->flex_regs_len); - config_length++; /* MI_BATCH_BUFFER_END */ + config_length += 3; /* MI_BATCH_BUFFER_START */ config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); @@ -1895,7 +1895,12 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, oa_config->flex_regs, oa_config->flex_regs_len); - *cs++ = MI_BATCH_BUFFER_END; + /* Jump into the active wait. */ + *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ? + MI_BATCH_BUFFER_START : + MI_BATCH_BUFFER_START_GEN8); + *cs++ = i915_ggtt_offset(stream->noa_wait); + *cs++ = 0; i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); @@ -3307,15 +3312,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, } } - if (props->hold_preemption) { - if (!props->single_context) { - DRM_DEBUG("preemption disable with no context\n"); - ret = -EINVAL; - goto err; - } - privileged_op = true; - } - /* * On Haswell the OA unit supports clock gating off for a specific * context and in this mode there's no visibility of metrics for the @@ -3335,12 +3331,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * doesn't request global stream access (i.e. query based sampling * using MI_RECORD_PERF_COUNT. */ - if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption) + if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; else if (IS_GEN(perf->i915, 12) && specific_ctx && (props->sample_flags & SAMPLE_OA_REPORT) == 0) privileged_op = false; + if (props->hold_preemption) { + if (!props->single_context) { + DRM_DEBUG("preemption disable with no context\n"); + ret = -EINVAL; + goto err; + } + privileged_op = true; + } + /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option * to determine if it's ok to access system wide OA counters diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 05395015d1f2..2814218c5ba1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt) val = 0; if (intel_gt_pm_get_if_awake(gt)) { val = __get_rc6(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } spin_lock_irqsave(&pmu->lock, flags); @@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) skip: spin_unlock_irqrestore(&engine->uncore->lock, flags); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); } } @@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); val = intel_get_cagf(rps, val); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], @@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu) const char *name; const char *unit; } events[] = { - __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), - __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), }; diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index c27cfef9281c..ef25ce6e395e 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915, struct drm_i915_engine_info __user *info_ptr; struct drm_i915_query_engine_info query; struct drm_i915_engine_info info = { }; + unsigned int num_uabi_engines = 0; struct intel_engine_cs *engine; int len, ret; if (query_item->flags) return -EINVAL; + for_each_uabi_engine(engine, i915) + num_uabi_engines++; + len = sizeof(struct drm_i915_query_engine_info) + - RUNTIME_INFO(i915)->num_engines * - sizeof(struct drm_i915_engine_info); + num_uabi_engines * sizeof(struct drm_i915_engine_info); ret = copy_query_item(&query, sizeof(query), len, query_item); if (ret != 0) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 53c280c4e741..73079b503724 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -474,6 +474,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) +#define GEN8_RC6_CTX_INFO _MMIO(0x8504) + #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1 << 13) #define ECOBITS_PPGTT_CACHE64B (3 << 8) @@ -560,6 +562,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) */ #define BCS_SWCTRL _MMIO(0x22200) +/* There are 16 GPR registers */ +#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) #define HS_INVOCATION_COUNT _MMIO(0x2300) @@ -7353,6 +7359,10 @@ enum { #define DMC_DEBUG3 _MMIO(0x101090) +/* Display Internal Timeout Register */ +#define RM_TIMEOUT _MMIO(0x42060) +#define MMIO_TIMEOUT_US(us) ((us) << 0) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) @@ -9661,7 +9671,7 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12) -#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10) +#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(11, 10) #define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \ REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans) #define TRANS_DDI_HDCP_SIGNALLING (1 << 9) diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 000ba43e2c02..8fd92b9130a7 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -62,7 +62,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* KBP is SPT compatible */ return PCH_SPT; case INTEL_PCH_CNP_DEVICE_ID_TYPE: - case INTEL_PCH_CNP2_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n"); WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv)); return PCH_CNP; @@ -76,6 +75,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) WARN_ON(!IS_COFFEELAKE(dev_priv)); /* CometPoint is CNP Compatible */ return PCH_CNP; + case INTEL_PCH_CMP_V_DEVICE_ID_TYPE: + DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n"); + WARN_ON(!IS_COFFEELAKE(dev_priv)); + /* Comet Lake V PCH is based on KBP, which is SPT compatible */ + return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: DRM_DEBUG_KMS("Found Ice Lake PCH\n"); WARN_ON(!IS_ICELAKE(dev_priv)); diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 1115c6a0522c..d26c25dd8d54 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -40,10 +40,10 @@ enum intel_pch { #define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00 #define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280 #define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300 -#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80 #define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280 #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 +#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5d2b460d3ee5..809bff955b5a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -107,6 +107,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 868cc78048d0..59aa1b6f1827 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf) static void ____intel_wakeref_put_last(struct intel_wakeref *wf) { - if (!atomic_dec_and_test(&wf->count)) + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + if (unlikely(!atomic_dec_and_test(&wf->count))) goto unlock; /* ops->put() must reschedule its own release on error/deferral */ @@ -67,13 +68,12 @@ unlock: mutex_unlock(&wf->mutex); } -void __intel_wakeref_put_last(struct intel_wakeref *wf) +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) { INTEL_WAKEREF_BUG_ON(work_pending(&wf->work)); /* Assume we are not in process context and so cannot sleep. */ - if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC || - !mutex_trylock(&wf->mutex)) { + if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { schedule_work(&wf->work); return; } @@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf, int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) { - return wait_var_event_killable(&wf->wakeref, - !intel_wakeref_is_active(wf)); + int err; + + might_sleep(); + + err = wait_var_event_killable(&wf->wakeref, + !intel_wakeref_is_active(wf)); + if (err) + return err; + + intel_wakeref_unlock_wait(wf); + return 0; } static void wakeref_auto_timeout(struct timer_list *t) diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 5f0c972a80fb..da6e8fd506e6 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/bits.h> +#include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/refcount.h> #include <linux/stackdepot.h> @@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t; struct intel_wakeref_ops { int (*get)(struct intel_wakeref *wf); int (*put)(struct intel_wakeref *wf); - - unsigned long flags; -#define INTEL_WAKEREF_PUT_ASYNC BIT(0) }; struct intel_wakeref { @@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf, } while (0) int __intel_wakeref_get_first(struct intel_wakeref *wf); -void __intel_wakeref_put_last(struct intel_wakeref *wf); +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags); /** * intel_wakeref_get: Acquire the wakeref @@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) } /** - * intel_wakeref_put: Release the wakeref - * @i915: the drm_i915_private device + * intel_wakeref_put_flags: Release the wakeref * @wf: the wakeref - * @fn: callback for releasing the wakeref, called only on final release. + * @flags: control flags * * Release our hold on the wakeref. When there are no more users, * the runtime pm wakeref will be released after the @fn callback is called @@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) * code otherwise. */ static inline void -intel_wakeref_put(struct intel_wakeref *wf) +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags) +#define INTEL_WAKEREF_PUT_ASYNC BIT(0) { INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); if (unlikely(!atomic_add_unless(&wf->count, -1, 1))) - __intel_wakeref_put_last(wf); + __intel_wakeref_put_last(wf, flags); +} + +static inline void +intel_wakeref_put(struct intel_wakeref *wf) +{ + might_sleep(); + __intel_wakeref_put(wf, 0); +} + +static inline void +intel_wakeref_put_async(struct intel_wakeref *wf) +{ + __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC); } /** @@ -152,6 +163,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf) } /** + * intel_wakeref_unlock_wait: Wait until the active callback is complete + * @wf: the wakeref + * + * Waits for the active callback (under the @wf->mutex or another CPU) is + * complete. + */ +static inline void +intel_wakeref_unlock_wait(struct intel_wakeref *wf) +{ + mutex_lock(&wf->mutex); + mutex_unlock(&wf->mutex); + flush_work(&wf->work); +} + +/** * intel_wakeref_is_active: Query whether the wakeref is currently held * @wf: the wakeref * @@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf) static inline void __intel_wakeref_defer_park(struct intel_wakeref *wf) { + lockdep_assert_held(&wf->mutex); INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); atomic_set_release(&wf->count, 1); } |