summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug1
-rw-r--r--drivers/gpu/drm/i915/Kconfig.profile2
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_cdclk.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.c61
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c39
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c10
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp_mst.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_sprite.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c111
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c67
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c87
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c166
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c74
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c35
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c15
-rw-r--r--drivers/gpu/drm/i915/i915_active.c58
-rw-r--r--drivers/gpu/drm/i915/i915_active_types.h1
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c435
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h35
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c16
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c2
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c8
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c29
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c10
-rw-r--r--drivers/gpu/drm/i915/i915_query.c7
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h12
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c6
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c8
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.c21
-rw-r--r--drivers/gpu/drm/i915/intel_wakeref.h45
55 files changed, 1164 insertions, 389 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 80815a5229a1..eea79125b3ea 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -36,7 +36,6 @@ config DRM_I915_DEBUG
select DRM_I915_SELFTEST
select DRM_I915_DEBUG_RUNTIME_PM
select DRM_I915_DEBUG_MMIO
- select BROKEN # for prototype uAPI
default n
help
Choose this option to turn on extra driver debugging that may affect
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 1799537a3228..c280b6ae38eb 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL
config DRM_I915_PREEMPT_TIMEOUT
int "Preempt timeout (ms, jiffy granularity)"
- default 100 # milliseconds
+ default 640 # milliseconds
help
How long to wait (in milliseconds) for a preemption event to occur
when submitting a new context via execlists. If the current context
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index 9cd6d2348a1e..c2875b10adf9 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -200,6 +200,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc)
crtc_state->update_wm_pre = false;
crtc_state->update_wm_post = false;
crtc_state->fifo_changed = false;
+ crtc_state->preload_luts = false;
crtc_state->wm.need_postvbl_update = false;
crtc_state->fb_bits = 0;
crtc_state->update_planes = 0;
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 0caef2592a7e..ed8c7ce62119 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk)
static u8 ehl_calc_voltage_level(int cdclk)
{
- if (cdclk > 312000)
+ if (cdclk > 326400)
+ return 3;
+ else if (cdclk > 312000)
return 2;
else if (cdclk > 180000)
return 1;
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index fa44eb73d088..aa3a063549c3 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -1022,6 +1022,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state)
dev_priv->display.color_commit(crtc_state);
}
+static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ return !old_crtc_state->base.gamma_lut &&
+ !old_crtc_state->base.degamma_lut;
+}
+
+static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ /*
+ * CGM_PIPE_MODE is itself single buffered. We'd have to
+ * somehow split it out from chv_load_luts() if we wanted
+ * the ability to preload the CGM LUTs/CSC without tearing.
+ */
+ if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode)
+ return false;
+
+ return !old_crtc_state->base.gamma_lut;
+}
+
+static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ /*
+ * The hardware degamma is active whenever the pipe
+ * CSC is active. Thus even if the old state has no
+ * software degamma we need to avoid clobbering the
+ * linear hardware degamma mid scanout.
+ */
+ return !old_crtc_state->csc_enable &&
+ !old_crtc_state->base.gamma_lut;
+}
+
int intel_color_check(struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
@@ -1165,6 +1214,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1217,6 +1268,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = chv_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1271,6 +1324,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1328,6 +1383,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1366,6 +1423,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = glk_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1415,6 +1474,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state)
crtc_state->csc_mode = icl_csc_mode(crtc_state);
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index b51f244ad7a5..c7c2b349858d 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans {
u32 dkl_de_emphasis_control;
};
-static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
+static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = {
/* VS pre-emp Non-trans mV Pre-emph dB */
{ 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */
{ 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */
@@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = {
{ 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */
};
+static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = {
+ /* HDMI Preset VS Pre-emph */
+ { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */
+ { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */
+ { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */
+ { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */
+ { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */
+ { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */
+ { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */
+ { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */
+ { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */
+ { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */
+};
+
static const struct ddi_buf_trans *
bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries)
{
@@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por
icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI,
0, &n_entries);
else
- n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
default_entry = n_entries - 1;
} else if (INTEL_GEN(dev_priv) == 11) {
if (intel_phy_is_combo(dev_priv, phy))
@@ -1794,10 +1808,8 @@ void intel_ddi_set_dp_msa(const struct intel_crtc_state *crtc_state,
* of Color Encoding Format and Content Color Gamut] while sending
* YCBCR 420, HDR BT.2020 signals we should program MSA MISC1 fields
* which indicate VSC SDP for the Pixel Encoding/Colorimetry Format.
- *
- * FIXME MST doesn't pass in the conn_state
*/
- if (conn_state && intel_dp_needs_vsc_sdp(crtc_state, conn_state))
+ if (intel_dp_needs_vsc_sdp(crtc_state, conn_state))
temp |= DP_MSA_MISC_COLOR_VSC_SDP;
I915_WRITE(TRANS_MSA_MISC(cpu_transcoder), temp);
@@ -2373,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder)
icl_get_combo_buf_trans(dev_priv, encoder->type,
intel_dp->link_rate, &n_entries);
else
- n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
} else if (INTEL_GEN(dev_priv) == 11) {
if (intel_phy_is_combo(dev_priv, phy))
icl_get_combo_buf_trans(dev_priv, encoder->type,
@@ -2825,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock,
const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations;
u32 n_entries, val, ln, dpcnt_mask, dpcnt_val;
- n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations);
- ddi_translations = tgl_dkl_phy_ddi_translations;
+ if (encoder->type == INTEL_OUTPUT_HDMI) {
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans);
+ ddi_translations = tgl_dkl_phy_hdmi_ddi_trans;
+ } else {
+ n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans);
+ ddi_translations = tgl_dkl_phy_dp_ddi_trans;
+ }
if (level >= n_entries)
level = n_entries - 1;
@@ -3605,7 +3622,11 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
else
hsw_ddi_pre_enable_dp(encoder, crtc_state, conn_state);
- intel_ddi_set_dp_msa(crtc_state, conn_state);
+ /* MST will call a setting of MSA after an allocating of Virtual Channel
+ * from MST encoder pre_enable callback.
+ */
+ if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
+ intel_ddi_set_dp_msa(crtc_state, conn_state);
}
static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 348ce0456696..6f5e3bd13ad1 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -66,6 +66,7 @@
#include "intel_cdclk.h"
#include "intel_color.h"
#include "intel_display_types.h"
+#include "intel_dp_link_training.h"
#include "intel_fbc.h"
#include "intel_fbdev.h"
#include "intel_fifo_underrun.h"
@@ -2528,6 +2529,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
* the highest stride limits of them all.
*/
crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
+ if (!crtc)
+ return 0;
+
plane = to_intel_plane(crtc->base.primary);
return plane->max_stride(plane, pixel_format, modifier,
@@ -14201,6 +14205,11 @@ static void intel_update_crtc(struct intel_crtc *crtc,
/* vblanks work again, re-enable pipe CRC. */
intel_crtc_enable_pipe_crc(crtc);
} else {
+ if (new_crtc_state->preload_luts &&
+ (new_crtc_state->base.color_mgmt_changed ||
+ new_crtc_state->update_pipe))
+ intel_color_load_luts(new_crtc_state);
+
intel_pre_plane_update(old_crtc_state, new_crtc_state);
if (new_crtc_state->update_pipe)
@@ -14713,6 +14722,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
if (new_crtc_state->base.active &&
!needs_modeset(new_crtc_state) &&
+ !new_crtc_state->preload_luts &&
(new_crtc_state->base.color_mgmt_changed ||
new_crtc_state->update_pipe))
intel_color_load_luts(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 355c50088589..f417e0948001 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -27,7 +27,6 @@
#include <drm/drm_util.h>
#include <drm/i915_drm.h>
-#include "intel_dp_link_training.h"
enum link_m_n_set;
struct dpll;
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 4341bd66a418..1a7334dbe802 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -775,6 +775,7 @@ struct intel_crtc_state {
bool disable_cxsr;
bool update_wm_pre, update_wm_post; /* watermarks are updated */
bool fifo_changed; /* FIFO split is changed */
+ bool preload_luts;
/* Pipe source size (ie. panel fitter input size)
* All planes will be positioned inside this space,
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 9ae5b8b6bbbc..03d1cba0b696 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -331,6 +331,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr);
intel_ddi_enable_pipe_clock(pipe_config);
+
+ intel_ddi_set_dp_msa(pipe_config, conn_state);
}
static void intel_mst_enable_dp(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 3d1061470e76..48c960ca12fb 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -234,6 +234,11 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->apertures->ranges[0].base = ggtt->gmadr.start;
info->apertures->ranges[0].size = ggtt->mappable_end;
+ /* Our framebuffer is the entirety of fbdev's system memory */
+ info->fix.smem_start =
+ (unsigned long)(ggtt->gmadr.start + vma->node.start);
+ info->fix.smem_len = vma->node.size;
+
vaddr = i915_vma_pin_iomap(vma);
if (IS_ERR(vaddr)) {
DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
@@ -243,10 +248,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->screen_base = vaddr;
info->screen_size = vma->node.size;
- /* Our framebuffer is the entirety of fbdev's system memory */
- info->fix.smem_start = (unsigned long)info->screen_base;
- info->fix.smem_len = info->screen_size;
-
drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
/* If the object is shmemfs backed, it will have given us zeroed pages.
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index edc41fc40726..72fda0430062 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -2885,7 +2885,7 @@ struct intel_plane *
skl_universal_plane_create(struct drm_i915_private *dev_priv,
enum pipe pipe, enum plane_id plane_id)
{
- static const struct drm_plane_funcs *plane_funcs;
+ const struct drm_plane_funcs *plane_funcs;
struct intel_plane *plane;
enum drm_plane_type plane_type;
unsigned int supported_rotations;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index de6e55af82cf..4237a2887ff2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -236,6 +236,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
free_engines(rcu_access_pointer(ctx->engines));
mutex_destroy(&ctx->engines_mutex);
+ kfree(ctx->jump_whitelist);
+
if (ctx->timeline)
intel_timeline_put(ctx->timeline);
@@ -366,7 +368,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (!ce->timeline)
return NULL;
- rcu_read_lock();
+ mutex_lock(&ce->timeline->mutex);
list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
if (i915_request_completed(rq))
break;
@@ -376,7 +378,7 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
if (engine)
break;
}
- rcu_read_unlock();
+ mutex_unlock(&ce->timeline->mutex);
return engine;
}
@@ -527,6 +529,9 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+ ctx->jump_whitelist = NULL;
+ ctx->jump_whitelist_cmds = 0;
+
spin_lock(&i915->gem.contexts.lock);
list_add_tail(&ctx->link, &i915->gem.contexts.list);
spin_unlock(&i915->gem.contexts.lock);
@@ -722,6 +727,7 @@ int i915_gem_init_contexts(struct drm_i915_private *i915)
void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
{
destroy_kernel_context(&i915->kernel_context);
+ flush_work(&i915->gem.contexts.free_work);
}
static int context_idr_cleanup(int id, void *p, void *data)
@@ -1136,7 +1142,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
if (i915_gem_context_is_closed(ctx)) {
err = -ENOENT;
- goto out;
+ goto unlock;
}
if (vm == rcu_access_pointer(ctx->vm))
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 861d7d92fe9f..3870dd5daaa0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -176,6 +176,13 @@ struct i915_gem_context {
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
+
+ /** jump_whitelist: Bit array for tracking cmds during cmdparsing
+ * Guarded by struct_mutex
+ */
+ unsigned long *jump_whitelist;
+ /** jump_whitelist_cmds: No of cmd slots available */
+ u32 jump_whitelist_cmds;
};
#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index e4f5c269150a..f0998f1225af 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -298,7 +298,9 @@ static inline u64 gen8_noncanonical_addr(u64 address)
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+ return intel_engine_requires_cmd_parser(eb->engine) ||
+ (intel_engine_using_cmd_parser(eb->engine) &&
+ eb->args->batch_len);
}
static int eb_create(struct i915_execbuffer *eb)
@@ -1990,40 +1992,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
return 0;
}
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+static struct i915_vma *
+shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *dev_priv = eb->i915;
+ struct i915_vma * const vma = *eb->vma;
+ struct i915_address_space *vm;
+ u64 flags;
+
+ /*
+ * PPGTT backed shadow buffers must be mapped RO, to prevent
+ * post-scan tampering
+ */
+ if (CMDPARSER_USES_GGTT(dev_priv)) {
+ flags = PIN_GLOBAL;
+ vm = &dev_priv->ggtt.vm;
+ } else if (vma->vm->has_read_only) {
+ flags = PIN_USER;
+ vm = vma->vm;
+ i915_gem_object_set_readonly(obj);
+ } else {
+ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
+}
+
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
{
struct intel_engine_pool_node *pool;
struct i915_vma *vma;
+ u64 batch_start;
+ u64 shadow_batch_start;
int err;
pool = intel_engine_get_pool(eb->engine, eb->batch_len);
if (IS_ERR(pool))
return ERR_CAST(pool);
- err = intel_engine_cmd_parser(eb->engine,
+ vma = shadow_batch_pin(eb, pool->obj);
+ if (IS_ERR(vma))
+ goto err;
+
+ batch_start = gen8_canonical_addr(eb->batch->node.start) +
+ eb->batch_start_offset;
+
+ shadow_batch_start = gen8_canonical_addr(vma->node.start);
+
+ err = intel_engine_cmd_parser(eb->gem_context,
+ eb->engine,
eb->batch->obj,
- pool->obj,
+ batch_start,
eb->batch_start_offset,
eb->batch_len,
- is_master);
+ pool->obj,
+ shadow_batch_start);
+
if (err) {
- if (err == -EACCES) /* unhandled chained batch */
+ i915_vma_unpin(vma);
+
+ /*
+ * Unsafe GGTT-backed buffers can still be submitted safely
+ * as non-secure.
+ * For PPGTT backing however, we have no choice but to forcibly
+ * reject unsafe buffers
+ */
+ if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
+ /* Execute original buffer non-secure */
vma = NULL;
else
vma = ERR_PTR(err);
goto err;
}
- vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- goto err;
-
eb->vma[eb->buffer_count] = i915_vma_get(vma);
eb->flags[eb->buffer_count] =
__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
vma->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
+ eb->batch_start_offset = 0;
+ eb->batch = vma;
+
+ if (CMDPARSER_USES_GGTT(eb->i915))
+ eb->batch_flags |= I915_DISPATCH_SECURE;
+
+ /* eb->batch_len unchanged */
+
vma->private = pool;
return vma;
@@ -2430,6 +2486,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
struct drm_syncobj **fences)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
@@ -2441,7 +2498,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
~__EXEC_OBJECT_UNKNOWN_FLAGS);
- eb.i915 = to_i915(dev);
+ eb.i915 = i915;
eb.file = file;
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@@ -2461,8 +2518,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
+ if (INTEL_GEN(i915) >= 11)
+ return -ENODEV;
+
+ /* Return -EPERM to trigger fallback code on old binaries. */
+ if (!HAS_SECURE_BATCHES(i915))
+ return -EPERM;
+
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -EPERM;
eb.batch_flags |= I915_DISPATCH_SECURE;
}
@@ -2539,34 +2603,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (eb.batch_len == 0)
+ eb.batch_len = eb.batch->size - eb.batch_start_offset;
+
if (eb_use_cmdparser(&eb)) {
struct i915_vma *vma;
- vma = eb_parse(&eb, drm_is_current_master(file));
+ vma = eb_parse(&eb);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_vma;
}
-
- if (vma) {
- /*
- * Batch parsed and accepted:
- *
- * Set the DISPATCH_SECURE bit to remove the NON_SECURE
- * bit from MI_BATCH_BUFFER_START commands issued in
- * the dispatch_execbuffer implementations. We
- * specifically don't want that set on batches the
- * command parser has accepted.
- */
- eb.batch_flags |= I915_DISPATCH_SECURE;
- eb.batch_start_offset = 0;
- eb.batch = vma;
- }
}
- if (eb.batch_len == 0)
- eb.batch_len = eb.batch->size - eb.batch_start_offset;
-
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 1e045c337044..4c72d74d6576 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -646,8 +646,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
obj->mm.dirty = false;
for_each_sgt_page(page, sgt_iter, pages) {
- if (obj->mm.dirty)
+ if (obj->mm.dirty && trylock_page(page)) {
+ /*
+ * As this may not be anonymous memory (e.g. shmem)
+ * but exist on a real mapping, we have to lock
+ * the page in order to dirty it -- holding
+ * the page reference is not sufficient to
+ * prevent the inode from being truncated.
+ * Play safe and take the lock.
+ *
+ * However...!
+ *
+ * The mmu-notifier can be invalidated for a
+ * migrate_page, that is alreadying holding the lock
+ * on the page. Such a try_to_unmap() will result
+ * in us calling put_pages() and so recursively try
+ * to lock the page. We avoid that deadlock with
+ * a trylock_page() and in exchange we risk missing
+ * some page dirtying.
+ */
set_page_dirty(page);
+ unlock_page(page);
+ }
mark_page_accessed(page);
put_page(page);
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index ee9d2bcd2c13..ef7bc41ffffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
GEM_BUG_ON(rq->hw_context == ce);
if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
- err = mutex_lock_interruptible_nested(&tl->mutex,
- SINGLE_DEPTH_NESTING);
- if (err)
- return err;
+ /*
+ * Ideally, we just want to insert our foreign fence as
+ * a barrier into the remove context, such that this operation
+ * occurs after all current operations in that context, and
+ * all future operations must occur after this.
+ *
+ * Currently, the timeline->last_request tracking is guarded
+ * by its mutex and so we must obtain that to atomically
+ * insert our barrier. However, since we already hold our
+ * timeline->mutex, we must be careful against potential
+ * inversion if we are the kernel_context as the remote context
+ * will itself poke at the kernel_context when it needs to
+ * unpin. Ergo, if already locked, we drop both locks and
+ * try again (through the magic of userspace repeating EAGAIN).
+ */
+ if (!mutex_trylock(&tl->mutex))
+ return -EAGAIN;
/* Queue this switch after current activity by this context. */
err = i915_active_fence_set(&tl->last_request, rq);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index bc3b72bfa9e3..01765a7ec18f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
- GEM_BUG_ON(execlists->active - execlists->inflight >
- execlists_num_ports(execlists));
- return READ_ONCE(*execlists->active);
+ return *READ_ONCE(execlists->active);
}
static inline void
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f8113bc756c6..813bd3a610d2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,13 +28,13 @@
#include "i915_drv.h"
-#include "gt/intel_gt.h"
-
+#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
#include "intel_engine_pool.h"
#include "intel_engine_user.h"
-#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_reset.h"
#include "intel_ring.h"
@@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_execlists(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
intel_engine_pool_init(&engine->pool);
@@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
cleanup_status_page(engine);
+ intel_engine_fini_retire(engine);
intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
@@ -1372,6 +1374,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}
execlists_active_lock_bh(execlists);
+ rcu_read_lock();
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
@@ -1409,6 +1412,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
if (tl)
intel_timeline_put(tl);
}
+ rcu_read_unlock();
execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 5051f304705b..06aa14c7aa8c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -141,8 +141,8 @@ void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
{
- cancel_delayed_work(&engine->heartbeat.work);
- i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+ if (cancel_delayed_work(&engine->heartbeat.work))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
}
void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 3c0f490ff2c7..0e1ad4a4bd97 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -73,8 +73,42 @@ static inline void __timeline_mark_unlock(struct intel_context *ce,
#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+static void
+__queue_and_release_pm(struct i915_request *rq,
+ struct intel_timeline *tl,
+ struct intel_engine_cs *engine)
+{
+ struct intel_gt_timelines *timelines = &engine->gt->timelines;
+
+ GEM_TRACE("%s\n", engine->name);
+
+ /*
+ * We have to serialise all potential retirement paths with our
+ * submission, as we don't want to underflow either the
+ * engine->wakeref.counter or our timeline->active_count.
+ *
+ * Equally, we cannot allow a new submission to start until
+ * after we finish queueing, nor could we allow that submitter
+ * to retire us before we are ready!
+ */
+ spin_lock(&timelines->lock);
+
+ /* Let intel_gt_retire_requests() retire us (acquired under lock) */
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+
+ /* Hand the request over to HW and so engine_retire() */
+ __i915_request_queue(rq, NULL);
+
+ /* Let new submissions commence (and maybe retire this timeline) */
+ __intel_wakeref_defer_park(&engine->wakeref);
+
+ spin_unlock(&timelines->lock);
+}
+
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
+ struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
unsigned long flags;
bool result = true;
@@ -98,16 +132,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* This should hold true as we can only park the engine after
* retiring the last request, thus all rings should be empty and
* all timelines idle.
+ *
+ * For unlocking, there are 2 other parties and the GPU who have a
+ * stake here.
+ *
+ * A new gpu user will be waiting on the engine-pm to start their
+ * engine_unpark. New waiters are predicated on engine->wakeref.count
+ * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
+ * engine->wakeref.
+ *
+ * The other party is intel_gt_retire_requests(), which is walking the
+ * list of active timelines looking for completions. Meanwhile as soon
+ * as we call __i915_request_queue(), the GPU may complete our request.
+ * Ergo, if we put ourselves on the timelines.active_list
+ * (se intel_timeline_enter()) before we increment the
+ * engine->wakeref.count, we may see the request completion and retire
+ * it causing an undeflow of the engine->wakeref.
*/
- flags = __timeline_mark_lock(engine->kernel_context);
+ flags = __timeline_mark_lock(ce);
+ GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
- rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+ rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
goto out_unlock;
- intel_timeline_enter(i915_request_timeline(rq));
-
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
i915_request_add_active_barriers(rq);
@@ -116,13 +165,12 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_commit(rq);
- /* Release our exclusive hold on the engine */
- __intel_wakeref_defer_park(&engine->wakeref);
- __i915_request_queue(rq, NULL);
+ /* Expose ourselves to the world */
+ __queue_and_release_pm(rq, ce->timeline, engine);
result = false;
out_unlock:
- __timeline_mark_unlock(engine->kernel_context, flags);
+ __timeline_mark_unlock(ce, flags);
return result;
}
@@ -177,7 +225,8 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
- intel_gt_pm_put(engine->gt);
+ /* While gt calls i915_vma_parked(), we have to break the lock cycle */
+ intel_gt_pm_put_async(engine->gt);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 739c50fefcef..24e20344dc22 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
intel_wakeref_put(&engine->wakeref);
}
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
+{
+ intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
void intel_engine_init__pm(struct intel_engine_cs *engine);
#endif /* INTEL_ENGINE_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index c5d1047a4bc5..17f1f1441efc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -451,16 +451,25 @@ struct intel_engine_cs {
struct intel_engine_execlists execlists;
+ /*
+ * Keep track of completed timelines on this engine for early
+ * retirement with the goal of quickly enabling powersaving as
+ * soon as the engine is idle.
+ */
+ struct intel_timeline *retire;
+ struct work_struct retire_work;
+
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -528,9 +537,15 @@ struct intel_engine_cs {
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 060a27d9af34..a459a42ad5c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -61,6 +61,9 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
@@ -86,6 +89,11 @@ static int __gt_park(struct intel_wakeref *wf)
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
+ intel_rc6_ctx_wa_check(&i915->gt.rc6);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
+
GEM_BUG_ON(!wakeref);
intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
@@ -97,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf)
static const struct intel_wakeref_ops wf_ops = {
.get = __gt_unpark,
.put = __gt_park,
- .flags = INTEL_WAKEREF_PUT_ASYNC,
};
void intel_gt_pm_init_early(struct intel_gt *gt)
@@ -264,7 +271,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
static suspend_state_t pm_suspend_target(void)
{
-#if IS_ENABLED(CONFIG_PM_SLEEP)
+#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
return pm_suspend_target_state;
#else
return PM_SUSPEND_TO_IDLE;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b3e17399be9b..990efc27a4e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt)
intel_wakeref_put(&gt->wakeref);
}
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+ intel_wakeref_put_async(&gt->wakeref);
+}
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index b73229a84d85..3dc13ecf41bf 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -4,6 +4,8 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/workqueue.h>
+
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_gt.h"
@@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt)
intel_engine_flush_submission(engine);
}
+static void engine_retire(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), retire_work);
+ struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+ do {
+ struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+ /*
+ * Our goal here is to retire _idle_ timelines as soon as
+ * possible (as they are idle, we do not expect userspace
+ * to be cleaning up anytime soon).
+ *
+ * If the timeline is currently locked, either it is being
+ * retired elsewhere or about to be!
+ */
+ if (mutex_trylock(&tl->mutex)) {
+ retire_requests(tl);
+ mutex_unlock(&tl->mutex);
+ }
+ intel_timeline_put(tl);
+
+ GEM_BUG_ON(!next);
+ tl = ptr_mask_bits(next, 1);
+ } while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ struct intel_timeline *first;
+
+ /*
+ * We open-code a llist here to include the additional tag [BIT(0)]
+ * so that we know when the timeline is already on a
+ * retirement queue: either this engine or another.
+ *
+ * However, we rely on that a timeline can only be active on a single
+ * engine at any one time and that add_retire() is called before the
+ * engine releases the timeline and transferred to another to retire.
+ */
+
+ if (READ_ONCE(tl->retire)) /* already queued */
+ return false;
+
+ intel_timeline_get(tl);
+ first = READ_ONCE(engine->retire);
+ do
+ tl->retire = ptr_pack_bits(first, 1, 1);
+ while (!try_cmpxchg(&engine->retire, &first, tl));
+
+ return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ if (add_retire(engine, tl))
+ schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+ INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+ flush_work(&engine->retire_work);
+ GEM_BUG_ON(engine->retire);
+}
+
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
{
struct intel_gt_timelines *timelines = &gt->timelines;
@@ -52,8 +127,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
}
intel_timeline_get(tl);
- GEM_BUG_ON(!tl->active_count);
- tl->active_count++; /* pin the list element */
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ atomic_inc(&tl->active_count); /* pin the list element */
spin_unlock_irqrestore(&timelines->lock, flags);
if (timeout > 0) {
@@ -74,16 +149,16 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
/* Resume iteration after dropping lock */
list_safe_reset_next(tl, tn, link);
- if (--tl->active_count)
- active_count += !!rcu_access_pointer(tl->last_request.fence);
- else
+ if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
+ else
+ active_count += !!rcu_access_pointer(tl->last_request.fence);
mutex_unlock(&tl->mutex);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
- GEM_BUG_ON(tl->active_count);
+ GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
index bd31cbce47e0..d626fb115386 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -7,7 +7,9 @@
#ifndef INTEL_GT_REQUESTS_H
#define INTEL_GT_REQUESTS_H
+struct intel_engine_cs;
struct intel_gt;
+struct intel_timeline;
long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
static inline void intel_gt_retire_requests(struct intel_gt *gt)
@@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt)
intel_gt_retire_requests_timeout(gt, 0);
}
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_init_requests(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 51aef2a233cb..9fdefbdc3546 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -142,6 +142,7 @@
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
@@ -990,6 +991,59 @@ static void intel_engine_context_out(struct intel_engine_cs *engine)
write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
+static void restore_default_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state)
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void reset_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context * const ce = rq->hw_context;
+ u32 head;
+
+ /*
+ * The executing context has been cancelled. We want to prevent
+ * further execution along this context and propagate the error on
+ * to anything depending on its results.
+ *
+ * In __i915_request_submit(), we apply the -EIO and remove the
+ * requests' payloads for any banned requests. But first, we must
+ * rewind the context back to the start of the incomplete request so
+ * that we do not jump back into the middle of the batch.
+ *
+ * We preserve the breadcrumbs and semaphores of the incomplete
+ * requests so that inter-timeline dependencies (i.e other timelines)
+ * remain correctly ordered. And we defer to __i915_request_submit()
+ * so that all asynchronous waits are correctly handled.
+ */
+ GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
+ __func__, engine->name, rq->fence.context, rq->fence.seqno);
+
+ /* On resubmission of the active request, payload will be scrubbed */
+ if (i915_request_completed(rq))
+ head = rq->tail;
+ else
+ head = active_request(ce->timeline, rq)->head;
+ ce->ring->head = intel_ring_wrap(ce->ring, head);
+ intel_ring_update_space(ce->ring);
+
+ /* Scrub the context image to prevent replaying the previous batch */
+ restore_default_state(ce, engine);
+ __execlists_update_reg_state(ce, engine);
+
+ /* We've switched away, so this should be a no-op, but intent matters */
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+}
+
static inline struct intel_engine_cs *
__execlists_schedule_in(struct i915_request *rq)
{
@@ -998,6 +1052,9 @@ __execlists_schedule_in(struct i915_request *rq)
intel_context_get(ce);
+ if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
+ reset_active(rq, engine);
+
if (ce->tag) {
/* Use a fixed tag for OA and friends */
ce->lrc_desc |= (u64)ce->tag << 32;
@@ -1047,71 +1104,29 @@ static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
tasklet_schedule(&ve->base.execlists.tasklet);
}
-static void restore_default_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
-{
- u32 *regs = ce->lrc_reg_state;
-
- if (engine->pinned_default_state)
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
-
- execlists_init_reg_state(regs, ce, engine, ce->ring, false);
-}
-
-static void reset_active(struct i915_request *rq,
- struct intel_engine_cs *engine)
+static inline void
+__execlists_schedule_out(struct i915_request *rq,
+ struct intel_engine_cs * const engine)
{
struct intel_context * const ce = rq->hw_context;
- u32 head;
/*
- * The executing context has been cancelled. We want to prevent
- * further execution along this context and propagate the error on
- * to anything depending on its results.
- *
- * In __i915_request_submit(), we apply the -EIO and remove the
- * requests' payloads for any banned requests. But first, we must
- * rewind the context back to the start of the incomplete request so
- * that we do not jump back into the middle of the batch.
- *
- * We preserve the breadcrumbs and semaphores of the incomplete
- * requests so that inter-timeline dependencies (i.e other timelines)
- * remain correctly ordered. And we defer to __i915_request_submit()
- * so that all asynchronous waits are correctly handled.
+ * NB process_csb() is not under the engine->active.lock and hence
+ * schedule_out can race with schedule_in meaning that we should
+ * refrain from doing non-trivial work here.
*/
- GEM_TRACE("%s(%s): { rq=%llx:%lld }\n",
- __func__, engine->name, rq->fence.context, rq->fence.seqno);
- /* On resubmission of the active request, payload will be scrubbed */
- if (i915_request_completed(rq))
- head = rq->tail;
- else
- head = active_request(ce->timeline, rq)->head;
- ce->ring->head = intel_ring_wrap(ce->ring, head);
- intel_ring_update_space(ce->ring);
-
- /* Scrub the context image to prevent replaying the previous batch */
- restore_default_state(ce, engine);
- __execlists_update_reg_state(ce, engine);
-
- /* We've switched away, so this should be a no-op, but intent matters */
- ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
-}
-
-static inline void
-__execlists_schedule_out(struct i915_request *rq,
- struct intel_engine_cs * const engine)
-{
- struct intel_context * const ce = rq->hw_context;
+ /*
+ * If we have just completed this context, the engine may now be
+ * idle and we want to re-enter powersaving.
+ */
+ if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ i915_request_completed(rq))
+ intel_engine_add_retire(engine, ce->timeline);
intel_engine_context_out(engine);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
- intel_gt_pm_put(engine->gt);
-
- if (unlikely(i915_gem_context_is_banned(ce->gem_context)))
- reset_active(rq, engine);
+ intel_gt_pm_put_async(engine->gt);
/*
* If this is part of a virtual engine, its next request may
@@ -1931,16 +1946,17 @@ skip_submit:
static void
cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct i915_request * const *port, *rq;
+ struct i915_request * const *port;
- for (port = execlists->pending; (rq = *port); port++)
- execlists_schedule_out(rq);
+ for (port = execlists->pending; *port; port++)
+ execlists_schedule_out(*port);
memset(execlists->pending, 0, sizeof(execlists->pending));
- for (port = execlists->active; (rq = *port); port++)
- execlists_schedule_out(rq);
- execlists->active =
- memset(execlists->inflight, 0, sizeof(execlists->inflight));
+ /* Mark the end of active before we overwrite *active */
+ for (port = xchg(&execlists->active, execlists->pending); *port; port++)
+ execlists_schedule_out(*port);
+ WRITE_ONCE(execlists->active,
+ memset(execlists->inflight, 0, sizeof(execlists->inflight)));
}
static inline void
@@ -2093,23 +2109,27 @@ static void process_csb(struct intel_engine_cs *engine)
else
promote = gen8_csb_parse(execlists, buf + 2 * head);
if (promote) {
+ struct i915_request * const *old = execlists->active;
+
+ /* Point active to the new ELSP; prevent overwriting */
+ WRITE_ONCE(execlists->active, execlists->pending);
+ set_timeslice(engine);
+
if (!inject_preempt_hang(execlists))
ring_set_paused(engine, 0);
/* cancel old inflight, prepare for switch */
- trace_ports(execlists, "preempted", execlists->active);
- while (*execlists->active)
- execlists_schedule_out(*execlists->active++);
+ trace_ports(execlists, "preempted", old);
+ while (*old)
+ execlists_schedule_out(*old++);
/* switch pending to inflight */
GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
- execlists->active =
- memcpy(execlists->inflight,
- execlists->pending,
- execlists_num_ports(execlists) *
- sizeof(*execlists->pending));
-
- set_timeslice(engine);
+ WRITE_ONCE(execlists->active,
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending)));
WRITE_ONCE(execlists->pending[0], NULL);
} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index 5ad4a92a9582..700104b90163 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -178,8 +178,13 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6)
GEN6_RC_CTL_RC6_ENABLE |
rc6_mode);
- set(uncore, GEN9_PG_ENABLE,
- GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+ /*
+ * WaRsDisableCoarsePowerGating:skl,cnl
+ * - Render/Media PG need to be disabled with RC6.
+ */
+ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
}
static void gen8_rc6_enable(struct intel_rc6 *rc6)
@@ -486,6 +491,66 @@ static void rpm_put(struct intel_rc6 *rc6)
rc6->wakeref = false;
}
+static bool intel_rc6_ctx_corrupted(struct intel_rc6 *rc6)
+{
+ return !intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO);
+}
+
+static void intel_rc6_ctx_wa_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (intel_rc6_ctx_corrupted(rc6)) {
+ DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+ rc6->ctx_corrupted = true;
+ }
+}
+
+/**
+ * intel_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @rc6: rc6 state
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6)
+{
+ if (rc6->ctx_corrupted && !intel_rc6_ctx_corrupted(rc6)) {
+ DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+ rc6->ctx_corrupted = false;
+ }
+}
+
+/**
+ * intel_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @rc6: rc6 state
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+*/
+void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (rc6->ctx_corrupted)
+ return;
+
+ if (!intel_rc6_ctx_corrupted(rc6))
+ return;
+
+ DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+ intel_rc6_disable(rc6);
+ rc6->ctx_corrupted = true;
+
+ return;
+}
+
static void __intel_rc6_disable(struct intel_rc6 *rc6)
{
struct drm_i915_private *i915 = rc6_to_i915(rc6);
@@ -510,6 +575,8 @@ void intel_rc6_init(struct intel_rc6 *rc6)
if (!rc6_supported(rc6))
return;
+ intel_rc6_ctx_wa_init(rc6);
+
if (IS_CHERRYVIEW(i915))
err = chv_rc6_init(rc6);
else if (IS_VALLEYVIEW(i915))
@@ -544,6 +611,9 @@ void intel_rc6_enable(struct intel_rc6 *rc6)
GEM_BUG_ON(rc6->enabled);
+ if (rc6->ctx_corrupted)
+ return;
+
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
if (IS_CHERRYVIEW(i915))
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
index 5e6711f36457..1370f6834a4c 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -22,4 +22,7 @@ void intel_rc6_disable(struct intel_rc6 *rc6);
u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
+void intel_rc6_ctx_wa_check(struct intel_rc6 *rc6);
+void intel_rc6_ctx_wa_resume(struct intel_rc6 *rc6);
+
#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index 214f354d6ae4..89ad5697a8d4 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -23,6 +23,7 @@ struct intel_rc6 {
bool supported : 1;
bool enabled : 1;
bool wakeref : 1;
+ bool ctx_corrupted : 1;
};
#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index f03e000051c1..c97423a76642 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
index ece20504d240..374b28f13ca0 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring)
i915_vma_make_unshrinkable(vma);
- GEM_BUG_ON(ring->vaddr);
- ring->vaddr = addr;
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->emit);
+ ring->vaddr = addr;
return 0;
err_ring:
@@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring)
if (!atomic_dec_and_test(&ring->pin_count))
return;
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->emit);
-
i915_vma_unset_ggtt_write(vma);
if (i915_vma_is_map_and_fenceable(vma))
i915_vma_unpin_iomap(vma);
else
i915_gem_object_unpin_map(vma->obj);
- GEM_BUG_ON(!ring->vaddr);
- ring->vaddr = NULL;
-
- i915_vma_unpin(vma);
i915_vma_make_purgeable(vma);
+ i915_vma_unpin(vma);
}
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
index 14ad10acd548..649798c184fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline.c
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline)
{
GEM_BUG_ON(atomic_read(&timeline->pin_count));
GEM_BUG_ON(!list_empty(&timeline->requests));
+ GEM_BUG_ON(timeline->retire);
if (timeline->hwsp_cacheline)
cacheline_free(timeline->hwsp_cacheline);
@@ -339,15 +340,33 @@ void intel_timeline_enter(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
+ /*
+ * Pretend we are serialised by the timeline->mutex.
+ *
+ * While generally true, there are a few exceptions to the rule
+ * for the engine->kernel_context being used to manage power
+ * transitions. As the engine_park may be called from under any
+ * timeline, it uses the power mutex as a global serialisation
+ * lock to prevent any other request entering its timeline.
+ *
+ * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
+ *
+ * However, intel_gt_retire_request() does not know which engine
+ * it is retiring along and so cannot partake in the engine-pm
+ * barrier, and there we use the tl->active_count as a means to
+ * pin the timeline in the active_list while the locks are dropped.
+ * Ergo, as that is outside of the engine-pm barrier, we need to
+ * use atomic to manipulate tl->active_count.
+ */
lockdep_assert_held(&tl->mutex);
-
GEM_BUG_ON(!atomic_read(&tl->pin_count));
- if (tl->active_count++)
+
+ if (atomic_add_unless(&tl->active_count, 1, 0))
return;
- GEM_BUG_ON(!tl->active_count); /* overflow? */
spin_lock_irqsave(&timelines->lock, flags);
- list_add(&tl->link, &timelines->active_list);
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
spin_unlock_irqrestore(&timelines->lock, flags);
}
@@ -356,14 +375,16 @@ void intel_timeline_exit(struct intel_timeline *tl)
struct intel_gt_timelines *timelines = &tl->gt->timelines;
unsigned long flags;
+ /* See intel_timeline_enter() */
lockdep_assert_held(&tl->mutex);
- GEM_BUG_ON(!tl->active_count);
- if (--tl->active_count)
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ if (atomic_add_unless(&tl->active_count, -1, 1))
return;
spin_lock_irqsave(&timelines->lock, flags);
- list_del(&tl->link);
+ if (atomic_dec_and_test(&tl->active_count))
+ list_del(&tl->link);
spin_unlock_irqrestore(&timelines->lock, flags);
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
index 98d9ee166379..aaf15cbe1ce1 100644
--- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -42,7 +42,7 @@ struct intel_timeline {
* from the intel_context caller plus internal atomicity.
*/
atomic_t pin_count;
- unsigned int active_count;
+ atomic_t active_count;
const u32 *hwsp_seqno;
struct i915_vma *hwsp_ggtt;
@@ -66,6 +66,9 @@ struct intel_timeline {
*/
struct i915_active_fence last_request;
+ /** A chain of completed timelines ready for early retirement. */
+ struct intel_timeline *retire;
+
/**
* We track the most recent seqno that we wait on in every context so
* that we only have to emit a new await and dependency on a more
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
index 20b9c83f43ad..cbf6b0735272 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -51,11 +51,12 @@ static int live_engine_pm(void *arg)
pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
engine->name, p->name);
else
- intel_engine_pm_put(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
+ intel_engine_pm_put_async(engine);
p->critical_section_end();
- /* engine wakeref is sync (instant) */
+ intel_engine_pm_flush(engine);
+
if (intel_engine_pm_is_awake(engine)) {
pr_err("%s is still awake after flushing pm\n",
engine->name);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 019ae6486e8d..3ee4a4e7689d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -554,6 +554,13 @@ int intel_guc_suspend(struct intel_guc *guc)
};
/*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to suspend the GuC.
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
+ /*
* The ENTER_S_STATE action queues the save/restore operation in GuC FW
* and then returns, so waiting on the H2G is not enough to guarantee
* GuC is done. When all the processing is done, GuC writes
@@ -610,6 +617,14 @@ int intel_guc_resume(struct intel_guc *guc)
GUC_POWER_D0,
};
+ /*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to resume the GuC but we do need to enable the
+ * GuC communication on resume (above).
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
return intel_guc_send(guc, action, ARRAY_SIZE(action));
}
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 207383dda84d..dca15ace88f6 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -91,14 +91,15 @@ static void debug_active_init(struct i915_active *ref)
static void debug_active_activate(struct i915_active *ref)
{
- lockdep_assert_held(&ref->mutex);
+ spin_lock_irq(&ref->tree_lock);
if (!atomic_read(&ref->count)) /* before the first inc */
debug_object_activate(ref, &active_debug_desc);
+ spin_unlock_irq(&ref->tree_lock);
}
static void debug_active_deactivate(struct i915_active *ref)
{
- lockdep_assert_held(&ref->mutex);
+ lockdep_assert_held(&ref->tree_lock);
if (!atomic_read(&ref->count)) /* after the last dec */
debug_object_deactivate(ref, &active_debug_desc);
}
@@ -128,29 +129,22 @@ __active_retire(struct i915_active *ref)
{
struct active_node *it, *n;
struct rb_root root;
- bool retire = false;
+ unsigned long flags;
- lockdep_assert_held(&ref->mutex);
GEM_BUG_ON(i915_active_is_idle(ref));
/* return the unused nodes to our slabcache -- flushing the allocator */
- if (atomic_dec_and_test(&ref->count)) {
- debug_active_deactivate(ref);
- root = ref->tree;
- ref->tree = RB_ROOT;
- ref->cache = NULL;
- retire = true;
- }
-
- mutex_unlock(&ref->mutex);
- if (!retire)
+ if (!atomic_dec_and_lock_irqsave(&ref->count, &ref->tree_lock, flags))
return;
GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
- rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
- GEM_BUG_ON(i915_active_fence_isset(&it->base));
- kmem_cache_free(global.slab_cache, it);
- }
+ debug_active_deactivate(ref);
+
+ root = ref->tree;
+ ref->tree = RB_ROOT;
+ ref->cache = NULL;
+
+ spin_unlock_irqrestore(&ref->tree_lock, flags);
/* After the final retire, the entire struct may be freed */
if (ref->retire)
@@ -158,6 +152,11 @@ __active_retire(struct i915_active *ref)
/* ... except if you wait on it, you must manage your own references! */
wake_up_var(ref);
+
+ rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
+ GEM_BUG_ON(i915_active_fence_isset(&it->base));
+ kmem_cache_free(global.slab_cache, it);
+ }
}
static void
@@ -169,7 +168,6 @@ active_work(struct work_struct *wrk)
if (atomic_add_unless(&ref->count, -1, 1))
return;
- mutex_lock(&ref->mutex);
__active_retire(ref);
}
@@ -180,9 +178,7 @@ active_retire(struct i915_active *ref)
if (atomic_add_unless(&ref->count, -1, 1))
return;
- /* If we are inside interrupt context (fence signaling), defer */
- if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS ||
- !mutex_trylock(&ref->mutex)) {
+ if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS) {
queue_work(system_unbound_wq, &ref->work);
return;
}
@@ -227,7 +223,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
if (!prealloc)
return NULL;
- mutex_lock(&ref->mutex);
+ spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
parent = NULL;
@@ -257,7 +253,7 @@ active_instance(struct i915_active *ref, struct intel_timeline *tl)
out:
ref->cache = node;
- mutex_unlock(&ref->mutex);
+ spin_unlock_irq(&ref->tree_lock);
BUILD_BUG_ON(offsetof(typeof(*node), base));
return &node->base;
@@ -278,8 +274,10 @@ void __i915_active_init(struct i915_active *ref,
if (bits & I915_ACTIVE_MAY_SLEEP)
ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
+ spin_lock_init(&ref->tree_lock);
ref->tree = RB_ROOT;
ref->cache = NULL;
+
init_llist_head(&ref->preallocated_barriers);
atomic_set(&ref->count, 0);
__mutex_init(&ref->mutex, "i915_active", key);
@@ -510,7 +508,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
if (RB_EMPTY_ROOT(&ref->tree))
return NULL;
- mutex_lock(&ref->mutex);
+ spin_lock_irq(&ref->tree_lock);
GEM_BUG_ON(i915_active_is_idle(ref));
/*
@@ -575,7 +573,7 @@ static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
goto match;
}
- mutex_unlock(&ref->mutex);
+ spin_unlock_irq(&ref->tree_lock);
return NULL;
@@ -583,7 +581,7 @@ match:
rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
if (p == &ref->cache->node)
ref->cache = NULL;
- mutex_unlock(&ref->mutex);
+ spin_unlock_irq(&ref->tree_lock);
return rb_entry(p, struct active_node, node);
}
@@ -664,6 +662,7 @@ unwind:
void i915_active_acquire_barrier(struct i915_active *ref)
{
struct llist_node *pos, *next;
+ unsigned long flags;
GEM_BUG_ON(i915_active_is_idle(ref));
@@ -673,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref)
* populated by i915_request_add_active_barriers() to point to the
* request that will eventually release them.
*/
- mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
struct active_node *node = barrier_from_ll(pos);
struct intel_engine_cs *engine = barrier_to_engine(node);
struct rb_node **p, *parent;
+ spin_lock_irqsave_nested(&ref->tree_lock, flags,
+ SINGLE_DEPTH_NESTING);
parent = NULL;
p = &ref->tree.rb_node;
while (*p) {
@@ -694,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref)
}
rb_link_node(&node->node, parent, p);
rb_insert_color(&node->node, &ref->tree);
+ spin_unlock_irqrestore(&ref->tree_lock, flags);
GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
llist_add(barrier_to_ll(node), &engine->barrier_tasks);
intel_engine_pm_put(engine);
}
- mutex_unlock(&ref->mutex);
}
void i915_request_add_active_barriers(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h
index d89a74c142c6..96aed0ee700a 100644
--- a/drivers/gpu/drm/i915/i915_active_types.h
+++ b/drivers/gpu/drm/i915/i915_active_types.h
@@ -48,6 +48,7 @@ struct i915_active {
atomic_t count;
struct mutex mutex;
+ spinlock_t tree_lock;
struct active_node *cache;
struct rb_root tree;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 24555102e198..f24096e27bef 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -53,13 +53,11 @@
* granting userspace undue privileges. There are three categories of privilege.
*
* First, commands which are explicitly defined as privileged or which should
- * only be used by the kernel driver. The parser generally rejects such
- * commands, though it may allow some from the drm master process.
+ * only be used by the kernel driver. The parser rejects such commands
*
* Second, commands which access registers. To support correct/enhanced
* userspace functionality, particularly certain OpenGL extensions, the parser
- * provides a whitelist of registers which userspace may safely access (for both
- * normal and drm master processes).
+ * provides a whitelist of registers which userspace may safely access
*
* Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
* The parser always rejects such commands.
@@ -84,9 +82,9 @@
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
- * mentioned above: rejected, master-only, register whitelist. The parser
- * implements a number of checks, including the privileged memory checks, via a
- * general bitmasking mechanism.
+ * mentioned above: rejected, register whitelist. The parser implements a number
+ * of checks, including the privileged memory checks, via a general bitmasking
+ * mechanism.
*/
/*
@@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor {
* CMD_DESC_REJECT: The command is never allowed
* CMD_DESC_REGISTER: The command should be checked against the
* register whitelist for the appropriate ring
- * CMD_DESC_MASTER: The command is allowed if the submitting process
- * is the DRM master
*/
u32 flags;
#define CMD_DESC_FIXED (1<<0)
@@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor {
#define CMD_DESC_REJECT (1<<2)
#define CMD_DESC_REGISTER (1<<3)
#define CMD_DESC_BITMASK (1<<4)
-#define CMD_DESC_MASTER (1<<5)
/*
* The command's unique identification bits and the bitmask to get them.
@@ -194,7 +189,7 @@ struct drm_i915_cmd_table {
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
- .cmd = { (op), ~0u << (opm) }, \
+ .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
@@ -209,14 +204,13 @@ struct drm_i915_cmd_table {
#define R CMD_DESC_REJECT
#define W CMD_DESC_REGISTER
#define B CMD_DESC_BITMASK
-#define M CMD_DESC_MASTER
/* Command Mask Fixed Len Action
---------------------------------------------------------- */
-static const struct drm_i915_cmd_descriptor common_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
CMD( MI_NOOP, SMI, F, 1, S ),
CMD( MI_USER_INTERRUPT, SMI, F, 1, R ),
- CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ),
CMD( MI_ARB_CHECK, SMI, F, 1, S ),
CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
@@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ),
};
-static const struct drm_i915_cmd_descriptor render_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
CMD( MI_FLUSH, SMI, F, 1, S ),
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_PREDICATE, SMI, F, 1, S ),
@@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_RS_CONTEXT, SMI, F, 1, S ),
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
@@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ),
};
-static const struct drm_i915_cmd_descriptor video_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
CMD( MFX_WAIT, SMFX, F, 1, S ),
};
-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
}}, ),
};
-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B,
.bits = {{
@@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
};
static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
+/*
+ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
+ * need to re-enforce the register access checks. We therefore only need to
+ * teach the cmdparser how to find the end of each command, and identify
+ * register accesses. The table doesn't need to reject any commands, and so
+ * the only commands listed here are:
+ * 1) Those that touch registers
+ * 2) Those that do not have the default 8-bit length
+ *
+ * Note that the default MI length mask chosen for this table is 0xFF, not
+ * the 0x3F used on older devices. This is because the vast majority of MI
+ * cmds on Gen9 use a standard 8-bit Length field.
+ * All the Gen9 blitter instructions are standard 0xFF length mask, and
+ * none allow access to non-general registers, so in fact no BLT cmds are
+ * included in the table at all.
+ *
+ */
+static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
+ CMD( MI_NOOP, SMI, F, 1, S ),
+ CMD( MI_USER_INTERRUPT, SMI, F, 1, S ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ),
+ CMD( MI_FLUSH, SMI, F, 1, S ),
+ CMD( MI_ARB_CHECK, SMI, F, 1, S ),
+ CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
+ CMD( MI_ARB_ON_OFF, SMI, F, 1, S ),
+ CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ),
+ CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ),
+ CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
+ CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ),
+ CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
+
+ /*
+ * We allow BB_START but apply further checks. We just sanitize the
+ * basic fields here.
+ */
+#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0)
+#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
+ CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B,
+ .bits = {{
+ .offset = 0,
+ .mask = MI_BB_START_OPERAND_MASK,
+ .expected = MI_BB_START_OPERAND_EXPECT,
+ }}, ),
+};
+
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
@@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc =
#undef R
#undef W
#undef B
-#undef M
-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
};
-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
};
-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { video_cmds, ARRAY_SIZE(video_cmds) },
+static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
};
-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
};
-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
};
-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
};
+static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
+ { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
+};
+
+
/*
* Register whitelists, sorted by increasing register offset.
*/
@@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
-};
-
-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
+static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
+ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
+ REG32(BCS_SWCTRL),
+ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+ REG64_IDX(BCS_GPR, 0),
+ REG64_IDX(BCS_GPR, 1),
+ REG64_IDX(BCS_GPR, 2),
+ REG64_IDX(BCS_GPR, 3),
+ REG64_IDX(BCS_GPR, 4),
+ REG64_IDX(BCS_GPR, 5),
+ REG64_IDX(BCS_GPR, 6),
+ REG64_IDX(BCS_GPR, 7),
+ REG64_IDX(BCS_GPR, 8),
+ REG64_IDX(BCS_GPR, 9),
+ REG64_IDX(BCS_GPR, 10),
+ REG64_IDX(BCS_GPR, 11),
+ REG64_IDX(BCS_GPR, 12),
+ REG64_IDX(BCS_GPR, 13),
+ REG64_IDX(BCS_GPR, 14),
+ REG64_IDX(BCS_GPR, 15),
};
#undef REG64
@@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
struct drm_i915_reg_table {
const struct drm_i915_reg_descriptor *regs;
int num_regs;
- bool master;
};
static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
};
static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
};
static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
+ { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
};
static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+};
+
+static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
+ { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
};
static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
+static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
+{
+ u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
+
+ if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
+ return 0xFF;
+
+ DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+ return 0;
+}
+
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
@@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
int cmd_table_count;
int ret;
- if (!IS_GEN(engine->i915, 7))
+ if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
+ engine->class == COPY_ENGINE_CLASS))
return;
switch (engine->class) {
case RENDER_CLASS:
if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_render_ring_cmds;
+ cmd_tables = hsw_render_ring_cmd_table;
cmd_table_count =
- ARRAY_SIZE(hsw_render_ring_cmds);
+ ARRAY_SIZE(hsw_render_ring_cmd_table);
} else {
- cmd_tables = gen7_render_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+ cmd_tables = gen7_render_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
}
if (IS_HASWELL(engine->i915)) {
@@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->reg_tables = ivb_render_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
}
-
engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VIDEO_DECODE_CLASS:
- cmd_tables = gen7_video_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+ cmd_tables = gen7_video_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case COPY_ENGINE_CLASS:
- if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_blt_ring_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+ engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+ if (IS_GEN(engine->i915, 9)) {
+ cmd_tables = gen9_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
+ engine->get_cmd_length_mask =
+ gen9_blt_get_cmd_length_mask;
+
+ /* BCS Engine unsafe without parser */
+ engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
+ } else if (IS_HASWELL(engine->i915)) {
+ cmd_tables = hsw_blt_ring_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
} else {
- cmd_tables = gen7_blt_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+ cmd_tables = gen7_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
}
- if (IS_HASWELL(engine->i915)) {
+ if (IS_GEN(engine->i915, 9)) {
+ engine->reg_tables = gen9_blt_reg_tables;
+ engine->reg_table_count =
+ ARRAY_SIZE(gen9_blt_reg_tables);
+ } else if (IS_HASWELL(engine->i915)) {
engine->reg_tables = hsw_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
} else {
engine->reg_tables = ivb_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
}
-
- engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VIDEO_ENHANCEMENT_CLASS:
- cmd_tables = hsw_vebox_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+ cmd_tables = hsw_vebox_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
/* VECS can use the same length_mask function as VCS */
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
@@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
return;
}
- engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
+ engine->flags |= I915_ENGINE_USING_CMD_PARSER;
}
/**
@@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
- if (!intel_engine_needs_cmd_parser(engine))
+ if (!intel_engine_using_cmd_parser(engine))
return;
fini_hash_table(engine);
@@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
}
static const struct drm_i915_reg_descriptor *
-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, u32 addr)
{
const struct drm_i915_reg_table *table = engine->reg_tables;
+ const struct drm_i915_reg_descriptor *reg = NULL;
int count = engine->reg_table_count;
- for (; count > 0; ++table, --count) {
- if (!table->master || is_master) {
- const struct drm_i915_reg_descriptor *reg;
+ for (; !reg && (count > 0); ++table, --count)
+ reg = __find_reg(table->regs, table->num_regs, addr);
- reg = __find_reg(table->regs, table->num_regs, addr);
- if (reg != NULL)
- return reg;
- }
- }
-
- return NULL;
+ return reg;
}
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
@@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
static bool check_cmd(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_descriptor *desc,
- const u32 *cmd, u32 length,
- const bool is_master)
+ const u32 *cmd, u32 length)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
@@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
- DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
- *cmd);
- return false;
- }
-
if (desc->flags & CMD_DESC_REGISTER) {
/*
* Get the distance between individual register offset
@@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
- find_reg(engine, is_master, reg_addr);
+ find_reg(engine, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
@@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return true;
}
+static int check_bbstart(const struct i915_gem_context *ctx,
+ u32 *cmd, u32 offset, u32 length,
+ u32 batch_len,
+ u64 batch_start,
+ u64 shadow_batch_start)
+{
+ u64 jump_offset, jump_target;
+ u32 target_cmd_offset, target_cmd_index;
+
+ /* For igt compatibility on older platforms */
+ if (CMDPARSER_USES_GGTT(ctx->i915)) {
+ DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
+ return -EACCES;
+ }
+
+ if (length != 3) {
+ DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
+ length);
+ return -EINVAL;
+ }
+
+ jump_target = *(u64*)(cmd+1);
+ jump_offset = jump_target - batch_start;
+
+ /*
+ * Any underflow of jump_target is guaranteed to be outside the range
+ * of a u32, so >= test catches both too large and too small
+ */
+ if (jump_offset >= batch_len) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ /*
+ * This cannot overflow a u32 because we already checked jump_offset
+ * is within the BB, and the batch_len is a u32
+ */
+ target_cmd_offset = lower_32_bits(jump_offset);
+ target_cmd_index = target_cmd_offset / sizeof(u32);
+
+ *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+
+ if (target_cmd_index == offset)
+ return 0;
+
+ if (ctx->jump_whitelist_cmds <= target_cmd_index) {
+ DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
+ return -EINVAL;
+ } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+{
+ const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
+ const u32 exact_size = BITS_TO_LONGS(batch_cmds);
+ u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
+ unsigned long *next_whitelist;
+
+ if (CMDPARSER_USES_GGTT(ctx->i915))
+ return;
+
+ if (batch_cmds <= ctx->jump_whitelist_cmds) {
+ bitmap_zero(ctx->jump_whitelist, batch_cmds);
+ return;
+ }
+
+again:
+ next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
+ if (next_whitelist) {
+ kfree(ctx->jump_whitelist);
+ ctx->jump_whitelist = next_whitelist;
+ ctx->jump_whitelist_cmds =
+ next_size * BITS_PER_BYTE * sizeof(long);
+ return;
+ }
+
+ if (next_size > exact_size) {
+ next_size = exact_size;
+ goto again;
+ }
+
+ DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+ bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+
+ return;
+}
+
#define LENGTH_BIAS 2
/**
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations
+ * @ctx: the context in which the batch is to execute
* @engine: the engine on which the batch is to execute
* @batch_obj: the batch buffer in question
- * @shadow_batch_obj: copy of the batch buffer in question
+ * @batch_start: Canonical base address of batch
* @batch_start_offset: byte offset in the batch at which execution starts
* @batch_len: length of the commands in batch_obj
- * @is_master: is the submitting process the drm master?
+ * @shadow_batch_obj: copy of the batch buffer in question
+ * @shadow_batch_start: Canonical base address of shadow_batch_obj
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+
+int intel_engine_cmd_parser(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master)
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start)
{
- u32 *cmd, *batch_end;
+ u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false;
@@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
return PTR_ERR(cmd);
}
+ init_whitelist(ctx, batch_len);
+
/*
* We use the batch length as size because the shadow object is as
* large or larger and copy_batch() will write MI_NOPs to the extra
@@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
do {
u32 length;
- if (*cmd == MI_BATCH_BUFFER_END) {
- if (needs_clflush_after) {
- void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
- drm_clflush_virt_range(ptr,
- (void *)(cmd + 1) - ptr);
- }
+ if (*cmd == MI_BATCH_BUFFER_END)
break;
- }
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
ret = -EINVAL;
- break;
- }
-
- /*
- * If the batch buffer contains a chained batch, return an
- * error that tells the caller to abort and dispatch the
- * workload as a non-secure batch.
- */
- if (desc->cmd.value == MI_BATCH_BUFFER_START) {
- ret = -EACCES;
- break;
+ goto err;
}
if (desc->flags & CMD_DESC_FIXED)
@@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
length,
batch_end - cmd);
ret = -EINVAL;
- break;
+ goto err;
}
- if (!check_cmd(engine, desc, cmd, length, is_master)) {
+ if (!check_cmd(engine, desc, cmd, length)) {
ret = -EACCES;
+ goto err;
+ }
+
+ if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+ ret = check_bbstart(ctx, cmd, offset, length,
+ batch_len, batch_start,
+ shadow_batch_start);
+
+ if (ret)
+ goto err;
break;
}
+ if (ctx->jump_whitelist_cmds > offset)
+ set_bit(offset, ctx->jump_whitelist);
+
cmd += length;
+ offset += length;
if (cmd >= batch_end) {
DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
ret = -EINVAL;
- break;
+ goto err;
}
} while (1);
+ if (needs_clflush_after) {
+ void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+
+ drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
+ }
+
+err:
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
@@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_uabi_engine(engine, dev_priv) {
- if (intel_engine_needs_cmd_parser(engine)) {
+ if (intel_engine_using_cmd_parser(engine)) {
active = true;
break;
}
@@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
* the parser enabled.
* 9. Don't whitelist or handle oacontrol specially, as ownership
* for oacontrol state is moving to i915-perf.
+ * 10. Support for Gen9 BCS Parsing
*/
- return 9;
+ return 10;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ccb5b566795f..87e05ca3646a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -63,6 +63,7 @@
#include "gem/i915_gem_ioctls.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
+#include "gt/intel_rc6.h"
#include "i915_debugfs.h"
#include "i915_drv.h"
@@ -1819,6 +1820,8 @@ static int i915_drm_resume(struct drm_device *dev)
disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
+ intel_rc6_ctx_wa_resume(&dev_priv->gt.rc6);
+
intel_gt_sanitize(&dev_priv->gt, true);
ret = i915_ggtt_enable_hw(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0567fe67e8c5..e29bc137e7ba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1614,9 +1614,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define VEBOX_MASK(dev_priv) \
ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS)
+/*
+ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
+ * All later gens can run the final buffer from the ppgtt
+ */
+#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7)
+
#define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc)
#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
@@ -1649,10 +1656,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
+#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \
+ (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
+
/* WaRsDisableCoarsePowerGating:skl,cnl */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
- (IS_CANNONLAKE(dev_priv) || \
- IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
+ (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9))
#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
#define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -1834,6 +1843,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
#define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
+struct i915_vma * __must_check
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags);
+
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
static inline int __must_check
@@ -1939,12 +1956,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+int intel_engine_cmd_parser(struct i915_gem_context *cxt,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 user_batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master);
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start);
/* intel_device_info.c */
static inline struct intel_device_info *
@@ -2026,4 +2045,10 @@ i915_coherent_map_type(struct drm_i915_private *i915)
return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
}
+static inline bool intel_guc_submission_is_enabled(struct intel_guc *guc)
+{
+ return intel_guc_is_submission_supported(guc) &&
+ intel_guc_is_running(guc);
+}
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ac23322fb382..b9eb6b3149b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -893,6 +893,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_address_space *vm = &dev_priv->ggtt.vm;
+
+ return i915_gem_object_pin(obj, vm, view, size, alignment,
+ flags | PIN_GLOBAL);
+}
+
+struct i915_vma *
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
@@ -958,7 +972,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return ERR_PTR(ret);
}
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin(vma, size, alignment, flags);
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 88179202c556..6239a9adbf14 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2609,8 +2609,6 @@ static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
- ppgtt->vm.total = ggtt->vm.total;
-
return 0;
err_ppgtt:
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index ad33fbe90a28..cf8a8c3ef047 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -63,7 +63,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
break;
case I915_PARAM_HAS_SECURE_BATCHES:
- value = capable(CAP_SYS_ADMIN);
+ value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN);
break;
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version(i915);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index e8b67f5e521d..3c85cb0ee99f 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1029,9 +1029,9 @@ i915_error_object_create(struct drm_i915_private *i915,
for_each_sgt_daddr(dma, iter, vma->pages) {
void __iomem *s;
- s = io_mapping_map_atomic_wc(&mem->iomap, dma);
+ s = io_mapping_map_wc(&mem->iomap, dma, PAGE_SIZE);
ret = compress_page(compress, (void __force *)s, dst);
- io_mapping_unmap_atomic(s);
+ io_mapping_unmap(s);
if (ret)
break;
}
@@ -1043,9 +1043,9 @@ i915_error_object_create(struct drm_i915_private *i915,
drm_clflush_pages(&page, 1);
- s = kmap_atomic(page);
+ s = kmap(page);
ret = compress_page(compress, s, dst);
- kunmap_atomic(s);
+ kunmap(s);
drm_clflush_pages(&page, 1);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index a8c2318d3d5e..65d7c2e599de 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1870,7 +1870,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
config_length += num_lri_dwords(oa_config->mux_regs_len);
config_length += num_lri_dwords(oa_config->b_counter_regs_len);
config_length += num_lri_dwords(oa_config->flex_regs_len);
- config_length++; /* MI_BATCH_BUFFER_END */
+ config_length += 3; /* MI_BATCH_BUFFER_START */
config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
@@ -1895,7 +1895,12 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
oa_config->flex_regs,
oa_config->flex_regs_len);
- *cs++ = MI_BATCH_BUFFER_END;
+ /* Jump into the active wait. */
+ *cs++ = (INTEL_GEN(stream->perf->i915) < 8 ?
+ MI_BATCH_BUFFER_START :
+ MI_BATCH_BUFFER_START_GEN8);
+ *cs++ = i915_ggtt_offset(stream->noa_wait);
+ *cs++ = 0;
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
@@ -3307,15 +3312,6 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
}
}
- if (props->hold_preemption) {
- if (!props->single_context) {
- DRM_DEBUG("preemption disable with no context\n");
- ret = -EINVAL;
- goto err;
- }
- privileged_op = true;
- }
-
/*
* On Haswell the OA unit supports clock gating off for a specific
* context and in this mode there's no visibility of metrics for the
@@ -3335,12 +3331,21 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf,
* doesn't request global stream access (i.e. query based sampling
* using MI_RECORD_PERF_COUNT.
*/
- if (IS_HASWELL(perf->i915) && specific_ctx && !props->hold_preemption)
+ if (IS_HASWELL(perf->i915) && specific_ctx)
privileged_op = false;
else if (IS_GEN(perf->i915, 12) && specific_ctx &&
(props->sample_flags & SAMPLE_OA_REPORT) == 0)
privileged_op = false;
+ if (props->hold_preemption) {
+ if (!props->single_context) {
+ DRM_DEBUG("preemption disable with no context\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ privileged_op = true;
+ }
+
/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
* we check a dev.i915.perf_stream_paranoid sysctl option
* to determine if it's ok to access system wide OA counters
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 05395015d1f2..2814218c5ba1 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt)
val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_async(gt);
}
spin_lock_irqsave(&pmu->lock, flags);
@@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
skip:
spin_unlock_irqrestore(&engine->uncore->lock, flags);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
}
}
@@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
if (intel_gt_pm_get_if_awake(gt)) {
val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1);
val = intel_get_cagf(rps, val);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_async(gt);
}
add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT],
@@ -878,8 +878,8 @@ create_event_attributes(struct i915_pmu *pmu)
const char *name;
const char *unit;
} events[] = {
- __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
- __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
+ __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
+ __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
};
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index c27cfef9281c..ef25ce6e395e 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915,
struct drm_i915_engine_info __user *info_ptr;
struct drm_i915_query_engine_info query;
struct drm_i915_engine_info info = { };
+ unsigned int num_uabi_engines = 0;
struct intel_engine_cs *engine;
int len, ret;
if (query_item->flags)
return -EINVAL;
+ for_each_uabi_engine(engine, i915)
+ num_uabi_engines++;
+
len = sizeof(struct drm_i915_query_engine_info) +
- RUNTIME_INFO(i915)->num_engines *
- sizeof(struct drm_i915_engine_info);
+ num_uabi_engines * sizeof(struct drm_i915_engine_info);
ret = copy_query_item(&query, sizeof(query), len, query_item);
if (ret != 0)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 53c280c4e741..73079b503724 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -474,6 +474,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define ECOCHK_PPGTT_WT_HSW (0x2 << 3)
#define ECOCHK_PPGTT_WB_HSW (0x3 << 3)
+#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
+
#define GAC_ECO_BITS _MMIO(0x14090)
#define ECOBITS_SNB_BIT (1 << 13)
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
@@ -560,6 +562,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
*/
#define BCS_SWCTRL _MMIO(0x22200)
+/* There are 16 GPR registers */
+#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8)
+#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4)
+
#define GPGPU_THREADS_DISPATCHED _MMIO(0x2290)
#define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4)
#define HS_INVOCATION_COUNT _MMIO(0x2300)
@@ -7353,6 +7359,10 @@ enum {
#define DMC_DEBUG3 _MMIO(0x101090)
+/* Display Internal Timeout Register */
+#define RM_TIMEOUT _MMIO(0x42060)
+#define MMIO_TIMEOUT_US(us) ((us) << 0)
+
/* interrupts */
#define DE_MASTER_IRQ_CONTROL (1 << 31)
#define DE_SPRITEB_FLIP_DONE (1 << 29)
@@ -9661,7 +9671,7 @@ enum skl_power_gate {
#define TRANS_DDI_EDP_INPUT_A_ONOFF (4 << 12)
#define TRANS_DDI_EDP_INPUT_B_ONOFF (5 << 12)
#define TRANS_DDI_EDP_INPUT_C_ONOFF (6 << 12)
-#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(12, 10)
+#define TRANS_DDI_MST_TRANSPORT_SELECT_MASK REG_GENMASK(11, 10)
#define TRANS_DDI_MST_TRANSPORT_SELECT(trans) \
REG_FIELD_PREP(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, trans)
#define TRANS_DDI_HDCP_SIGNALLING (1 << 9)
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index 000ba43e2c02..8fd92b9130a7 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -62,7 +62,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
/* KBP is SPT compatible */
return PCH_SPT;
case INTEL_PCH_CNP_DEVICE_ID_TYPE:
- case INTEL_PCH_CNP2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Cannon Lake PCH (CNP)\n");
WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
return PCH_CNP;
@@ -76,6 +75,11 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_COFFEELAKE(dev_priv));
/* CometPoint is CNP Compatible */
return PCH_CNP;
+ case INTEL_PCH_CMP_V_DEVICE_ID_TYPE:
+ DRM_DEBUG_KMS("Found Comet Lake V PCH (CMP-V)\n");
+ WARN_ON(!IS_COFFEELAKE(dev_priv));
+ /* Comet Lake V PCH is based on KBP, which is SPT compatible */
+ return PCH_SPT;
case INTEL_PCH_ICP_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Ice Lake PCH\n");
WARN_ON(!IS_ICELAKE(dev_priv));
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index 1115c6a0522c..d26c25dd8d54 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -40,10 +40,10 @@ enum intel_pch {
#define INTEL_PCH_SPT_LP_DEVICE_ID_TYPE 0x9D00
#define INTEL_PCH_KBP_DEVICE_ID_TYPE 0xA280
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
-#define INTEL_PCH_CNP2_DEVICE_ID_TYPE 0xA380
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280
#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
+#define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 5d2b460d3ee5..809bff955b5a 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -107,6 +107,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
*/
I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+ /*
+ * Lower the display internal timeout.
+ * This is needed to avoid any hard hangs when DSI port PLL
+ * is off and a MMIO access is attempted by any privilege
+ * application, using batch buffers or any other means.
+ */
+ I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
}
static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c
index 868cc78048d0..59aa1b6f1827 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.c
+++ b/drivers/gpu/drm/i915/intel_wakeref.c
@@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf)
static void ____intel_wakeref_put_last(struct intel_wakeref *wf)
{
- if (!atomic_dec_and_test(&wf->count))
+ INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
+ if (unlikely(!atomic_dec_and_test(&wf->count)))
goto unlock;
/* ops->put() must reschedule its own release on error/deferral */
@@ -67,13 +68,12 @@ unlock:
mutex_unlock(&wf->mutex);
}
-void __intel_wakeref_put_last(struct intel_wakeref *wf)
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags)
{
INTEL_WAKEREF_BUG_ON(work_pending(&wf->work));
/* Assume we are not in process context and so cannot sleep. */
- if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC ||
- !mutex_trylock(&wf->mutex)) {
+ if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) {
schedule_work(&wf->work);
return;
}
@@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
int intel_wakeref_wait_for_idle(struct intel_wakeref *wf)
{
- return wait_var_event_killable(&wf->wakeref,
- !intel_wakeref_is_active(wf));
+ int err;
+
+ might_sleep();
+
+ err = wait_var_event_killable(&wf->wakeref,
+ !intel_wakeref_is_active(wf));
+ if (err)
+ return err;
+
+ intel_wakeref_unlock_wait(wf);
+ return 0;
}
static void wakeref_auto_timeout(struct timer_list *t)
diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h
index 5f0c972a80fb..da6e8fd506e6 100644
--- a/drivers/gpu/drm/i915/intel_wakeref.h
+++ b/drivers/gpu/drm/i915/intel_wakeref.h
@@ -9,6 +9,7 @@
#include <linux/atomic.h>
#include <linux/bits.h>
+#include <linux/lockdep.h>
#include <linux/mutex.h>
#include <linux/refcount.h>
#include <linux/stackdepot.h>
@@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t;
struct intel_wakeref_ops {
int (*get)(struct intel_wakeref *wf);
int (*put)(struct intel_wakeref *wf);
-
- unsigned long flags;
-#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
};
struct intel_wakeref {
@@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf,
} while (0)
int __intel_wakeref_get_first(struct intel_wakeref *wf);
-void __intel_wakeref_put_last(struct intel_wakeref *wf);
+void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags);
/**
* intel_wakeref_get: Acquire the wakeref
@@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
}
/**
- * intel_wakeref_put: Release the wakeref
- * @i915: the drm_i915_private device
+ * intel_wakeref_put_flags: Release the wakeref
* @wf: the wakeref
- * @fn: callback for releasing the wakeref, called only on final release.
+ * @flags: control flags
*
* Release our hold on the wakeref. When there are no more users,
* the runtime pm wakeref will be released after the @fn callback is called
@@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf)
* code otherwise.
*/
static inline void
-intel_wakeref_put(struct intel_wakeref *wf)
+__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
+#define INTEL_WAKEREF_PUT_ASYNC BIT(0)
{
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0);
if (unlikely(!atomic_add_unless(&wf->count, -1, 1)))
- __intel_wakeref_put_last(wf);
+ __intel_wakeref_put_last(wf, flags);
+}
+
+static inline void
+intel_wakeref_put(struct intel_wakeref *wf)
+{
+ might_sleep();
+ __intel_wakeref_put(wf, 0);
+}
+
+static inline void
+intel_wakeref_put_async(struct intel_wakeref *wf)
+{
+ __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC);
}
/**
@@ -152,6 +163,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf)
}
/**
+ * intel_wakeref_unlock_wait: Wait until the active callback is complete
+ * @wf: the wakeref
+ *
+ * Waits for the active callback (under the @wf->mutex or another CPU) is
+ * complete.
+ */
+static inline void
+intel_wakeref_unlock_wait(struct intel_wakeref *wf)
+{
+ mutex_lock(&wf->mutex);
+ mutex_unlock(&wf->mutex);
+ flush_work(&wf->work);
+}
+
+/**
* intel_wakeref_is_active: Query whether the wakeref is currently held
* @wf: the wakeref
*
@@ -170,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf)
static inline void
__intel_wakeref_defer_park(struct intel_wakeref *wf)
{
+ lockdep_assert_held(&wf->mutex);
INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count));
atomic_set_release(&wf->count, 1);
}
OpenPOWER on IntegriCloud