summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/Makefile2
-rw-r--r--drivers/gpu/drm/i915/gt/Makefile.header-test16
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.c36
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_engines.h14
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.c42
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt.h39
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.c601
-rw-r--r--drivers/gpu/drm/i915/gt/debugfs_gt_pm.h14
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.c483
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_ppgtt.h76
-rw-r--r--drivers/gpu/drm/i915/gt/gen6_renderstate.c315
-rw-r--r--drivers/gpu/drm/i915/gt/gen7_renderstate.c279
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c724
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.h13
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_renderstate.c983
-rw-r--r--drivers/gpu/drm/i915/gt/gen9_renderstate.c999
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c116
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c285
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h119
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h332
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c854
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c242
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c213
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h51
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.c190
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.h34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool_types.h29
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h268
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.c299
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_user.h25
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c1486
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h84
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c658
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h71
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.c456
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_irq.h44
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c311
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h59
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c109
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h22
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.c225
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_requests.h32
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h119
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c598
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h587
-rw-r--r--drivers/gpu/drm/i915/gt/intel_hangcheck.c347
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.c161
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc.h15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_llc_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c3463
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h46
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc_reg.h67
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c426
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c218
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.c776
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6.h28
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rc6_types.h31
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.c250
-rw-r--r--drivers/gpu/drm/i915/gt/intel_renderstate.h66
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c875
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h83
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset_types.h56
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.c318
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring.h131
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_submission.c (renamed from drivers/gpu/drm/i915/gt/intel_ringbuffer.c)873
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ring_types.h51
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c1903
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps_types.h93
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.h37
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.c573
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline.h94
-rw-r--r--drivers/gpu/drm/i915/gt/intel_timeline_types.h100
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c369
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds_types.h1
-rw-r--r--drivers/gpu/drm/i915/gt/mock_engine.c161
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_context.c443
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine.c28
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine.h14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c386
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c374
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_pm.c84
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c79
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c703
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.c80
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_llc.h14
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c2925
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_mocs.c419
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.c203
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rc6.h13
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c133
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_timeline.c836
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c466
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.c29
-rw-r--r--drivers/gpu/drm/i915/gt/selftests/mock_timeline.h17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c727
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h186
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c172
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h15
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c822
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h77
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c166
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h603
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c674
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h82
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h146
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c682
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h23
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c219
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h54
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c43
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h14
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c620
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.h89
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c604
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h240
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h77
123 files changed, 32910 insertions, 5001 deletions
diff --git a/drivers/gpu/drm/i915/gt/Makefile b/drivers/gpu/drm/i915/gt/Makefile
deleted file mode 100644
index 1c75b5c9790c..000000000000
--- a/drivers/gpu/drm/i915/gt/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# Extra header tests
-include $(src)/Makefile.header-test
diff --git a/drivers/gpu/drm/i915/gt/Makefile.header-test b/drivers/gpu/drm/i915/gt/Makefile.header-test
deleted file mode 100644
index 61e06cbb4b32..000000000000
--- a/drivers/gpu/drm/i915/gt/Makefile.header-test
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-License-Identifier: MIT
-# Copyright © 2019 Intel Corporation
-
-# Test the headers are compilable as standalone units
-header_test := $(notdir $(wildcard $(src)/*.h))
-
-quiet_cmd_header_test = HDRTEST $@
- cmd_header_test = echo "\#include \"$(<F)\"" > $@
-
-header_test_%.c: %.h
- $(call cmd,header_test)
-
-extra-$(CONFIG_DRM_I915_WERROR) += \
- $(foreach h,$(header_test),$(patsubst %.h,header_test_%.o,$(h)))
-
-clean-files += $(foreach h,$(header_test),$(patsubst %.h,header_test_%.c,$(h)))
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c b/drivers/gpu/drm/i915/gt/debugfs_engines.c
new file mode 100644
index 000000000000..6a5e9ab20b94
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <drm/drm_print.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "i915_drv.h" /* for_each_engine! */
+#include "intel_engine.h"
+
+static int engines_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct drm_printer p;
+
+ p = drm_seq_file_printer(m);
+ for_each_engine(engine, gt, id)
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "engines", &engines_fops },
+ };
+
+ debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.h b/drivers/gpu/drm/i915/gt/debugfs_engines.h
new file mode 100644
index 000000000000..f69257eaa1cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_engines.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_ENGINES_H
+#define DEBUGFS_ENGINES_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_engines_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_ENGINES_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.c b/drivers/gpu/drm/i915/gt/debugfs_gt.c
new file mode 100644
index 000000000000..75255aaacaed
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "debugfs_engines.h"
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+
+void debugfs_gt_register(struct intel_gt *gt)
+{
+ struct dentry *root;
+
+ if (!gt->i915->drm.primary->debugfs_root)
+ return;
+
+ root = debugfs_create_dir("gt", gt->i915->drm.primary->debugfs_root);
+ if (IS_ERR(root))
+ return;
+
+ debugfs_engines_register(gt, root);
+ debugfs_gt_pm_register(gt, root);
+}
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+ struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count)
+{
+ while (count--) {
+ if (!files->eval || files->eval(gt))
+ debugfs_create_file(files->name,
+ 0444, root, gt,
+ files->fops);
+
+ files++;
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt.h b/drivers/gpu/drm/i915/gt/debugfs_gt.h
new file mode 100644
index 000000000000..4ea0f06cda8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_H
+#define DEBUGFS_GT_H
+
+#include <linux/file.h>
+
+struct intel_gt;
+
+#define DEFINE_GT_DEBUGFS_ATTRIBUTE(__name) \
+ static int __name ## _open(struct inode *inode, struct file *file) \
+{ \
+ return single_open(file, __name ## _show, inode->i_private); \
+} \
+static const struct file_operations __name ## _fops = { \
+ .owner = THIS_MODULE, \
+ .open = __name ## _open, \
+ .read = seq_read, \
+ .llseek = seq_lseek, \
+ .release = single_release, \
+}
+
+void debugfs_gt_register(struct intel_gt *gt);
+
+struct debugfs_gt_file {
+ const char *name;
+ const struct file_operations *fops;
+ bool (*eval)(const struct intel_gt *gt);
+};
+
+void debugfs_gt_register_files(struct intel_gt *gt,
+ struct dentry *root,
+ const struct debugfs_gt_file *files,
+ unsigned long count);
+
+#endif /* DEBUGFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
new file mode 100644
index 000000000000..059c9e5c002e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
@@ -0,0 +1,601 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/seq_file.h>
+
+#include "debugfs_gt.h"
+#include "debugfs_gt_pm.h"
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
+#include "intel_runtime_pm.h"
+#include "intel_sideband.h"
+#include "intel_uncore.h"
+
+static int fw_domains_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_uncore_forcewake_domain *fw_domain;
+ unsigned int tmp;
+
+ seq_printf(m, "user.bypass_count = %u\n",
+ uncore->user_forcewake_count);
+
+ for_each_fw_domain(fw_domain, uncore, tmp)
+ seq_printf(m, "%s.wake_count = %u\n",
+ intel_uncore_forcewake_domain_to_str(fw_domain->id),
+ READ_ONCE(fw_domain->wake_count));
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
+
+static void print_rc6_res(struct seq_file *m,
+ const char *title,
+ const i915_reg_t reg)
+{
+ struct intel_gt *gt = m->private;
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ seq_printf(m, "%s %u (%llu us)\n", title,
+ intel_uncore_read(gt->uncore, reg),
+ intel_rc6_residency_us(&gt->rc6, reg));
+}
+
+static int vlv_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rcctl1, pw_status;
+
+ pw_status = intel_uncore_read(uncore, VLV_GTLC_PW_STATUS);
+ rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+
+ seq_printf(m, "RC6 Enabled: %s\n",
+ yesno(rcctl1 & (GEN7_RC_CTL_TO_MODE |
+ GEN6_RC_CTL_EI_MODE(1))));
+ seq_printf(m, "Render Power Well: %s\n",
+ (pw_status & VLV_GTLC_PW_RENDER_STATUS_MASK) ? "Up" : "Down");
+ seq_printf(m, "Media Power Well: %s\n",
+ (pw_status & VLV_GTLC_PW_MEDIA_STATUS_MASK) ? "Up" : "Down");
+
+ print_rc6_res(m, "Render RC6 residency since boot:", VLV_GT_RENDER_RC6);
+ print_rc6_res(m, "Media RC6 residency since boot:", VLV_GT_MEDIA_RC6);
+
+ return fw_domains_show(m, NULL);
+}
+
+static int gen6_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 gt_core_status, rcctl1, rc6vids = 0;
+ u32 gen9_powergate_enable = 0, gen9_powergate_status = 0;
+
+ gt_core_status = intel_uncore_read_fw(uncore, GEN6_GT_CORE_STATUS);
+
+ rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+ if (INTEL_GEN(i915) >= 9) {
+ gen9_powergate_enable =
+ intel_uncore_read(uncore, GEN9_PG_ENABLE);
+ gen9_powergate_status =
+ intel_uncore_read(uncore, GEN9_PWRGT_DOMAIN_STATUS);
+ }
+
+ if (INTEL_GEN(i915) <= 7)
+ sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+ &rc6vids, NULL);
+
+ seq_printf(m, "RC1e Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC1e_ENABLE));
+ seq_printf(m, "RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
+ if (INTEL_GEN(i915) >= 9) {
+ seq_printf(m, "Render Well Gating Enabled: %s\n",
+ yesno(gen9_powergate_enable & GEN9_RENDER_PG_ENABLE));
+ seq_printf(m, "Media Well Gating Enabled: %s\n",
+ yesno(gen9_powergate_enable & GEN9_MEDIA_PG_ENABLE));
+ }
+ seq_printf(m, "Deep RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
+ seq_printf(m, "Deepest RC6 Enabled: %s\n",
+ yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
+ seq_puts(m, "Current RC state: ");
+ switch (gt_core_status & GEN6_RCn_MASK) {
+ case GEN6_RC0:
+ if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
+ seq_puts(m, "Core Power Down\n");
+ else
+ seq_puts(m, "on\n");
+ break;
+ case GEN6_RC3:
+ seq_puts(m, "RC3\n");
+ break;
+ case GEN6_RC6:
+ seq_puts(m, "RC6\n");
+ break;
+ case GEN6_RC7:
+ seq_puts(m, "RC7\n");
+ break;
+ default:
+ seq_puts(m, "Unknown\n");
+ break;
+ }
+
+ seq_printf(m, "Core Power Down: %s\n",
+ yesno(gt_core_status & GEN6_CORE_CPD_STATE_MASK));
+ if (INTEL_GEN(i915) >= 9) {
+ seq_printf(m, "Render Power Well: %s\n",
+ (gen9_powergate_status &
+ GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
+ seq_printf(m, "Media Power Well: %s\n",
+ (gen9_powergate_status &
+ GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
+ }
+
+ /* Not exactly sure what this is */
+ print_rc6_res(m, "RC6 \"Locked to RPn\" residency since boot:",
+ GEN6_GT_GFX_RC6_LOCKED);
+ print_rc6_res(m, "RC6 residency since boot:", GEN6_GT_GFX_RC6);
+ print_rc6_res(m, "RC6+ residency since boot:", GEN6_GT_GFX_RC6p);
+ print_rc6_res(m, "RC6++ residency since boot:", GEN6_GT_GFX_RC6pp);
+
+ if (INTEL_GEN(i915) <= 7) {
+ seq_printf(m, "RC6 voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 0) & 0xff)));
+ seq_printf(m, "RC6+ voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 8) & 0xff)));
+ seq_printf(m, "RC6++ voltage: %dmV\n",
+ GEN6_DECODE_RC6_VID(((rc6vids >> 16) & 0xff)));
+ }
+
+ return fw_domains_show(m, NULL);
+}
+
+static int ilk_drpc(struct seq_file *m)
+{
+ struct intel_gt *gt = m->private;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rgvmodectl, rstdbyctl;
+ u16 crstandvid;
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+ rstdbyctl = intel_uncore_read(uncore, RSTDBYCTL);
+ crstandvid = intel_uncore_read16(uncore, CRSTANDVID);
+
+ seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN));
+ seq_printf(m, "Boost freq: %d\n",
+ (rgvmodectl & MEMMODE_BOOST_FREQ_MASK) >>
+ MEMMODE_BOOST_FREQ_SHIFT);
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+ seq_printf(m, "Gated voltage change: %s\n",
+ yesno(rgvmodectl & MEMMODE_RCLK_GATE));
+ seq_printf(m, "Starting frequency: P%d\n",
+ (rgvmodectl & MEMMODE_FSTART_MASK) >> MEMMODE_FSTART_SHIFT);
+ seq_printf(m, "Max P-state: P%d\n",
+ (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT);
+ seq_printf(m, "Min P-state: P%d\n", (rgvmodectl & MEMMODE_FMIN_MASK));
+ seq_printf(m, "RS1 VID: %d\n", (crstandvid & 0x3f));
+ seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
+ seq_printf(m, "Render standby enabled: %s\n",
+ yesno(!(rstdbyctl & RCX_SW_EXIT)));
+ seq_puts(m, "Current RS state: ");
+ switch (rstdbyctl & RSX_STATUS_MASK) {
+ case RSX_STATUS_ON:
+ seq_puts(m, "on\n");
+ break;
+ case RSX_STATUS_RC1:
+ seq_puts(m, "RC1\n");
+ break;
+ case RSX_STATUS_RC1E:
+ seq_puts(m, "RC1E\n");
+ break;
+ case RSX_STATUS_RS1:
+ seq_puts(m, "RS1\n");
+ break;
+ case RSX_STATUS_RS2:
+ seq_puts(m, "RS2 (RC6)\n");
+ break;
+ case RSX_STATUS_RS3:
+ seq_puts(m, "RC3 (RC6+)\n");
+ break;
+ default:
+ seq_puts(m, "unknown\n");
+ break;
+ }
+
+ return 0;
+}
+
+static int drpc_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ intel_wakeref_t wakeref;
+ int err = -ENODEV;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = vlv_drpc(m);
+ else if (INTEL_GEN(i915) >= 6)
+ err = gen6_drpc(m);
+ else
+ err = ilk_drpc(m);
+ }
+
+ return err;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
+
+static int frequency_show(struct seq_file *m, void *unused)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps *rps = &gt->rps;
+ intel_wakeref_t wakeref;
+
+ wakeref = intel_runtime_pm_get(uncore->rpm);
+
+ if (IS_GEN(i915, 5)) {
+ u16 rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+ u16 rgvstat = intel_uncore_read16(uncore, MEMSTAT_ILK);
+
+ seq_printf(m, "Requested P-state: %d\n", (rgvswctl >> 8) & 0xf);
+ seq_printf(m, "Requested VID: %d\n", rgvswctl & 0x3f);
+ seq_printf(m, "Current VID: %d\n", (rgvstat & MEMSTAT_VID_MASK) >>
+ MEMSTAT_VID_SHIFT);
+ seq_printf(m, "Current P-state: %d\n",
+ (rgvstat & MEMSTAT_PSTATE_MASK) >> MEMSTAT_PSTATE_SHIFT);
+ } else if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ u32 rpmodectl, freq_sts;
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
+
+ vlv_punit_get(i915);
+ freq_sts = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ vlv_punit_put(i915);
+
+ seq_printf(m, "PUNIT_REG_GPU_FREQ_STS: 0x%08x\n", freq_sts);
+ seq_printf(m, "DDR freq: %d MHz\n", i915->mem_freq);
+
+ seq_printf(m, "actual GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, (freq_sts >> 8) & 0xff));
+
+ seq_printf(m, "current GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+
+ seq_printf(m, "max GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ seq_printf(m, "min GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+
+ seq_printf(m, "idle GPU freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+
+ seq_printf(m, "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+ } else if (INTEL_GEN(i915) >= 6) {
+ u32 rp_state_limits;
+ u32 gt_perf_status;
+ u32 rp_state_cap;
+ u32 rpmodectl, rpinclimit, rpdeclimit;
+ u32 rpstat, cagf, reqf;
+ u32 rpupei, rpcurup, rpprevup;
+ u32 rpdownei, rpcurdown, rpprevdown;
+ u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+ int max_freq;
+
+ rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+ if (IS_GEN9_LP(i915)) {
+ rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+ gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+ } else {
+ rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+ gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+ }
+
+ /* RPSTAT1 is in the GT power well */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+ if (INTEL_GEN(i915) >= 9) {
+ reqf >>= 23;
+ } else {
+ reqf &= ~GEN6_TURBO_DISABLE;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ reqf >>= 24;
+ else
+ reqf >>= 25;
+ }
+ reqf = intel_gpu_freq(rps, reqf);
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+ rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ rpupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+ rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+ rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+ cagf = intel_rps_read_actual_frequency(rps);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ if (INTEL_GEN(i915) >= 11) {
+ pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+ /*
+ * The equivalent to the PM ISR & IIR cannot be read
+ * without affecting the current state of the system
+ */
+ pm_isr = 0;
+ pm_iir = 0;
+ } else if (INTEL_GEN(i915) >= 8) {
+ pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+ pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+ pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+ pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+ } else {
+ pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+ pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+ pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+ pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+ }
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ seq_printf(m, "Video Turbo Mode: %s\n",
+ yesno(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ seq_printf(m, "HW control enabled: %s\n",
+ yesno(rpmodectl & GEN6_RP_ENABLE));
+ seq_printf(m, "SW control enabled: %s\n",
+ yesno((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) ==
+ GEN6_RP_MEDIA_SW_MODE));
+
+ seq_printf(m, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ pm_ier, pm_imr, pm_mask);
+ if (INTEL_GEN(i915) <= 10)
+ seq_printf(m, "PM ISR=0x%08x IIR=0x%08x\n",
+ pm_isr, pm_iir);
+ seq_printf(m, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ seq_printf(m, "Render p-state ratio: %d\n",
+ (gt_perf_status & (INTEL_GEN(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+ seq_printf(m, "Render p-state VID: %d\n",
+ gt_perf_status & 0xff);
+ seq_printf(m, "Render p-state limit: %d\n",
+ rp_state_limits & 0xff);
+ seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
+ seq_printf(m, "RPMODECTL: 0x%08x\n", rpmodectl);
+ seq_printf(m, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
+ seq_printf(m, "CAGF: %dMHz\n", cagf);
+ seq_printf(m, "RP CUR UP EI: %d (%dus)\n",
+ rpupei, GT_PM_INTERVAL_TO_US(i915, rpupei));
+ seq_printf(m, "RP CUR UP: %d (%dus)\n",
+ rpcurup, GT_PM_INTERVAL_TO_US(i915, rpcurup));
+ seq_printf(m, "RP PREV UP: %d (%dus)\n",
+ rpprevup, GT_PM_INTERVAL_TO_US(i915, rpprevup));
+ seq_printf(m, "Up threshold: %d%%\n",
+ rps->power.up_threshold);
+
+ seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
+ rpdownei, GT_PM_INTERVAL_TO_US(i915, rpdownei));
+ seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
+ rpcurdown, GT_PM_INTERVAL_TO_US(i915, rpcurdown));
+ seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
+ rpprevdown, GT_PM_INTERVAL_TO_US(i915, rpprevdown));
+ seq_printf(m, "Down threshold: %d%%\n",
+ rps->power.down_threshold);
+
+ max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
+ rp_state_cap >> 16) & 0xff;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+
+ max_freq = (rp_state_cap & 0xff00) >> 8;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+
+ max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 16 :
+ rp_state_cap >> 0) & 0xff;
+ max_freq *= (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ? GEN9_FREQ_SCALER : 1);
+ seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, max_freq));
+ seq_printf(m, "Max overclocked frequency: %dMHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ seq_printf(m, "Current freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+ seq_printf(m, "Actual freq: %d MHz\n", cagf);
+ seq_printf(m, "Idle freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+ seq_printf(m, "Min freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+ seq_printf(m, "Boost freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->boost_freq));
+ seq_printf(m, "Max freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+ seq_printf(m,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+ } else {
+ seq_puts(m, "no P-state info available\n");
+ }
+
+ seq_printf(m, "Current CD clock frequency: %d kHz\n", i915->cdclk.hw.cdclk);
+ seq_printf(m, "Max CD clock frequency: %d kHz\n", i915->max_cdclk_freq);
+ seq_printf(m, "Max pixel clock frequency: %d kHz\n", i915->max_dotclk_freq);
+
+ intel_runtime_pm_put(uncore->rpm, wakeref);
+
+ return 0;
+}
+DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
+
+static int llc_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ const bool edram = INTEL_GEN(i915) > 8;
+ struct intel_rps *rps = &gt->rps;
+ unsigned int max_gpu_freq, min_gpu_freq;
+ intel_wakeref_t wakeref;
+ int gpu_freq, ia_freq;
+
+ seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(i915)));
+ seq_printf(m, "%s: %uMB\n", edram ? "eDRAM" : "eLLC",
+ i915->edram_size_mb);
+
+ min_gpu_freq = rps->min_freq;
+ max_gpu_freq = rps->max_freq;
+ if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ /* Convert GT frequency to 50 HZ units */
+ min_gpu_freq /= GEN9_FREQ_SCALER;
+ max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
+ ia_freq = gpu_freq;
+ sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &ia_freq, NULL);
+ seq_printf(m, "%d\t\t%d\t\t\t\t%d\n",
+ intel_gpu_freq(rps,
+ (gpu_freq *
+ (IS_GEN9_BC(i915) ||
+ INTEL_GEN(i915) >= 10 ?
+ GEN9_FREQ_SCALER : 1))),
+ ((ia_freq >> 0) & 0xff) * 100,
+ ((ia_freq >> 8) & 0xff) * 100);
+ }
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+
+ return 0;
+}
+
+static bool llc_eval(const struct intel_gt *gt)
+{
+ return HAS_LLC(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
+
+static const char *rps_power_to_str(unsigned int power)
+{
+ static const char * const strings[] = {
+ [LOW_POWER] = "low power",
+ [BETWEEN] = "mixed",
+ [HIGH_POWER] = "high power",
+ };
+
+ if (power >= ARRAY_SIZE(strings) || !strings[power])
+ return "unknown";
+
+ return strings[power];
+}
+
+static int rps_boost_show(struct seq_file *m, void *data)
+{
+ struct intel_gt *gt = m->private;
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_rps *rps = &gt->rps;
+
+ seq_printf(m, "RPS enabled? %d\n", rps->enabled);
+ seq_printf(m, "GPU busy? %s\n", yesno(gt->awake));
+ seq_printf(m, "Boosts outstanding? %d\n",
+ atomic_read(&rps->num_waiters));
+ seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
+ seq_printf(m, "Frequency requested %d, actual %d\n",
+ intel_gpu_freq(rps, rps->cur_freq),
+ intel_rps_read_actual_frequency(rps));
+ seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ intel_gpu_freq(rps, rps->min_freq_softlimit),
+ intel_gpu_freq(rps, rps->max_freq_softlimit),
+ intel_gpu_freq(rps, rps->max_freq));
+ seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
+ intel_gpu_freq(rps, rps->idle_freq),
+ intel_gpu_freq(rps, rps->efficient_freq),
+ intel_gpu_freq(rps, rps->boost_freq));
+
+ seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts));
+
+ if (INTEL_GEN(i915) >= 6 && rps->enabled && gt->awake) {
+ struct intel_uncore *uncore = gt->uncore;
+ u32 rpup, rpupei;
+ u32 rpdown, rpdownei;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ rpup = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK;
+ rpupei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK;
+ rpdown = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK;
+ rpdownei = intel_uncore_read_fw(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK;
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
+ rps_power_to_str(rps->power.mode));
+ seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
+ rpup && rpupei ? 100 * rpup / rpupei : 0,
+ rps->power.up_threshold);
+ seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
+ rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
+ rps->power.down_threshold);
+ } else {
+ seq_puts(m, "\nRPS Autotuning inactive\n");
+ }
+
+ return 0;
+}
+
+static bool rps_eval(const struct intel_gt *gt)
+{
+ return HAS_RPS(gt->i915);
+}
+
+DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
+{
+ static const struct debugfs_gt_file files[] = {
+ { "drpc", &drpc_fops, NULL },
+ { "frequency", &frequency_fops, NULL },
+ { "forcewake", &fw_domains_fops, NULL },
+ { "llc", &llc_fops, llc_eval },
+ { "rps_boost", &rps_boost_fops, rps_eval },
+ };
+
+ debugfs_gt_register_files(gt, root, files, ARRAY_SIZE(files));
+}
diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
new file mode 100644
index 000000000000..4cf5f5c9da7d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef DEBUGFS_GT_PM_H
+#define DEBUGFS_GT_PM_H
+
+struct intel_gt;
+struct dentry;
+
+void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root);
+
+#endif /* DEBUGFS_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
new file mode 100644
index 000000000000..f4fec7eb4064
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -0,0 +1,483 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen6_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+
+/* Write pde (index) from the page directory @pd to the page table @pt */
+static inline void gen6_write_pde(const struct gen6_ppgtt *ppgtt,
+ const unsigned int pde,
+ const struct i915_page_table *pt)
+{
+ /* Caller needs to make sure the write completes if necessary */
+ iowrite32(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
+ ppgtt->pd_addr + pde);
+}
+
+void gen7_ppgtt_enable(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 ecochk;
+
+ intel_uncore_rmw(uncore, GAC_ECO_BITS, 0, ECOBITS_PPGTT_CACHE64B);
+
+ ecochk = intel_uncore_read(uncore, GAM_ECOCHK);
+ if (IS_HASWELL(i915)) {
+ ecochk |= ECOCHK_PPGTT_WB_HSW;
+ } else {
+ ecochk |= ECOCHK_PPGTT_LLC_IVB;
+ ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
+ }
+ intel_uncore_write(uncore, GAM_ECOCHK, ecochk);
+
+ for_each_engine(engine, gt, id) {
+ /* GFX_MODE is per-ring on gen7+ */
+ ENGINE_WRITE(engine,
+ RING_MODE_GEN7,
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ }
+}
+
+void gen6_ppgtt_enable(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ intel_uncore_rmw(uncore,
+ GAC_ECO_BITS,
+ 0,
+ ECOBITS_SNB_BIT | ECOBITS_PPGTT_CACHE64B);
+
+ intel_uncore_rmw(uncore,
+ GAB_CTL,
+ 0,
+ GAB_CTL_CONT_AFTER_PAGEFAULT);
+
+ intel_uncore_rmw(uncore,
+ GAM_ECOCHK,
+ 0,
+ ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
+
+ if (HAS_PPGTT(uncore->i915)) /* may be disabled for VT-d */
+ intel_uncore_write(uncore,
+ GFX_MODE,
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+}
+
+/* PPGTT support for Sandybdrige/Gen6 and later */
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ const unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ const gen6_pte_t scratch_pte = vm->scratch[0].encode;
+ unsigned int pde = first_entry / GEN6_PTES;
+ unsigned int pte = first_entry % GEN6_PTES;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+
+ while (num_entries) {
+ struct i915_page_table * const pt =
+ i915_pt_entry(ppgtt->base.pd, pde++);
+ const unsigned int count = min(num_entries, GEN6_PTES - pte);
+ gen6_pte_t *vaddr;
+
+ GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1]));
+
+ num_entries -= count;
+
+ GEM_BUG_ON(count > atomic_read(&pt->used));
+ if (!atomic_sub_return(count, &pt->used))
+ ppgtt->scan_for_unused_pt = true;
+
+ /*
+ * Note that the hw doesn't support removing PDE on the fly
+ * (they are cached inside the context with no means to
+ * invalidate the cache), so we can only reset the PTE
+ * entries back to scratch.
+ */
+
+ vaddr = kmap_atomic_px(pt);
+ memset32(vaddr + pte, scratch_pte, count);
+ kunmap_atomic(vaddr);
+
+ pte = 0;
+ }
+}
+
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct i915_page_directory * const pd = ppgtt->pd;
+ unsigned int first_entry = vma->node.start / I915_GTT_PAGE_SIZE;
+ unsigned int act_pt = first_entry / GEN6_PTES;
+ unsigned int act_pte = first_entry % GEN6_PTES;
+ const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
+ struct sgt_dma iter = sgt_dma(vma);
+ gen6_pte_t *vaddr;
+
+ GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
+
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
+ do {
+ GEM_BUG_ON(iter.sg->length < I915_GTT_PAGE_SIZE);
+ vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
+
+ iter.dma += I915_GTT_PAGE_SIZE;
+ if (iter.dma == iter.max) {
+ iter.sg = __sg_next(iter.sg);
+ if (!iter.sg)
+ break;
+
+ iter.dma = sg_dma_address(iter.sg);
+ iter.max = iter.dma + iter.sg->length;
+ }
+
+ if (++act_pte == GEN6_PTES) {
+ kunmap_atomic(vaddr);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
+ act_pte = 0;
+ }
+ } while (1);
+ kunmap_atomic(vaddr);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+}
+
+static void gen6_flush_pd(struct gen6_ppgtt *ppgtt, u64 start, u64 end)
+{
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_table *pt;
+ unsigned int pde;
+
+ start = round_down(start, SZ_64K);
+ end = round_up(end, SZ_64K) - start;
+
+ mutex_lock(&ppgtt->flush);
+
+ gen6_for_each_pde(pt, pd, start, end, pde)
+ gen6_write_pde(ppgtt, pde, pt);
+
+ mb();
+ ioread32(ppgtt->pd_addr + pde - 1);
+ gen6_ggtt_invalidate(ppgtt->base.vm.gt->ggtt);
+ mb();
+
+ mutex_unlock(&ppgtt->flush);
+}
+
+static int gen6_alloc_va_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_table *pt, *alloc = NULL;
+ intel_wakeref_t wakeref;
+ u64 from = start;
+ unsigned int pde;
+ int ret = 0;
+
+ wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+ spin_lock(&pd->lock);
+ gen6_for_each_pde(pt, pd, start, length, pde) {
+ const unsigned int count = gen6_pte_count(start, length);
+
+ if (px_base(pt) == px_base(&vm->scratch[1])) {
+ spin_unlock(&pd->lock);
+
+ pt = fetch_and_zero(&alloc);
+ if (!pt)
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto unwind_out;
+ }
+
+ fill32_px(pt, vm->scratch[0].encode);
+
+ spin_lock(&pd->lock);
+ if (pd->entry[pde] == &vm->scratch[1]) {
+ pd->entry[pde] = pt;
+ } else {
+ alloc = pt;
+ pt = pd->entry[pde];
+ }
+ }
+
+ atomic_add(count, &pt->used);
+ }
+ spin_unlock(&pd->lock);
+
+ if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND))
+ gen6_flush_pd(ppgtt, from, start);
+
+ goto out;
+
+unwind_out:
+ gen6_ppgtt_clear_range(vm, from, start - from);
+out:
+ if (alloc)
+ free_px(vm, alloc);
+ intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+ return ret;
+}
+
+static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
+{
+ struct i915_address_space * const vm = &ppgtt->base.vm;
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ int ret;
+
+ ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ if (ret)
+ return ret;
+
+ vm->scratch[0].encode =
+ vm->pte_encode(px_dma(&vm->scratch[0]),
+ I915_CACHE_NONE, PTE_READ_ONLY);
+
+ if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) {
+ cleanup_scratch_page(vm);
+ return -ENOMEM;
+ }
+
+ fill32_px(&vm->scratch[1], vm->scratch[0].encode);
+ memset_p(pd->entry, &vm->scratch[1], I915_PDES);
+
+ return 0;
+}
+
+static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
+{
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_dma * const scratch =
+ px_base(&ppgtt->base.vm.scratch[1]);
+ struct i915_page_table *pt;
+ u32 pde;
+
+ gen6_for_all_pdes(pt, pd, pde)
+ if (px_base(pt) != scratch)
+ free_px(&ppgtt->base.vm, pt);
+}
+
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
+
+ __i915_vma_put(ppgtt->vma);
+
+ gen6_ppgtt_free_pd(ppgtt);
+ free_scratch(vm);
+
+ mutex_destroy(&ppgtt->flush);
+ mutex_destroy(&ppgtt->pin_mutex);
+ kfree(ppgtt->base.pd);
+}
+
+static int pd_vma_set_pages(struct i915_vma *vma)
+{
+ vma->pages = ERR_PTR(-ENODEV);
+ return 0;
+}
+
+static void pd_vma_clear_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!vma->pages);
+
+ vma->pages = NULL;
+}
+
+static int pd_vma_bind(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vma->vm);
+ struct gen6_ppgtt *ppgtt = vma->private;
+ u32 ggtt_offset = i915_ggtt_offset(vma) / I915_GTT_PAGE_SIZE;
+
+ px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t);
+ ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset;
+
+ gen6_flush_pd(ppgtt, 0, ppgtt->base.vm.total);
+ return 0;
+}
+
+static void pd_vma_unbind(struct i915_vma *vma)
+{
+ struct gen6_ppgtt *ppgtt = vma->private;
+ struct i915_page_directory * const pd = ppgtt->base.pd;
+ struct i915_page_dma * const scratch =
+ px_base(&ppgtt->base.vm.scratch[1]);
+ struct i915_page_table *pt;
+ unsigned int pde;
+
+ if (!ppgtt->scan_for_unused_pt)
+ return;
+
+ /* Free all no longer used page tables */
+ gen6_for_all_pdes(pt, ppgtt->base.pd, pde) {
+ if (px_base(pt) == scratch || atomic_read(&pt->used))
+ continue;
+
+ free_px(&ppgtt->base.vm, pt);
+ pd->entry[pde] = scratch;
+ }
+
+ ppgtt->scan_for_unused_pt = false;
+}
+
+static const struct i915_vma_ops pd_vma_ops = {
+ .set_pages = pd_vma_set_pages,
+ .clear_pages = pd_vma_clear_pages,
+ .bind_vma = pd_vma_bind,
+ .unbind_vma = pd_vma_unbind,
+};
+
+static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
+{
+ struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+ GEM_BUG_ON(size > ggtt->vm.total);
+
+ vma = i915_vma_alloc();
+ if (!vma)
+ return ERR_PTR(-ENOMEM);
+
+ i915_active_init(&vma->active, NULL, NULL);
+
+ kref_init(&vma->ref);
+ mutex_init(&vma->pages_mutex);
+ vma->vm = i915_vm_get(&ggtt->vm);
+ vma->ops = &pd_vma_ops;
+ vma->private = ppgtt;
+
+ vma->size = size;
+ vma->fence_size = size;
+ atomic_set(&vma->flags, I915_VMA_GGTT);
+ vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
+
+ INIT_LIST_HEAD(&vma->obj_link);
+ INIT_LIST_HEAD(&vma->closed_link);
+
+ return vma;
+}
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+ int err;
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->base.vm.open));
+
+ /*
+ * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
+ * which will be pinned into every active context.
+ * (When vma->pin_count becomes atomic, I expect we will naturally
+ * need a larger, unpacked, type and kill this redundancy.)
+ */
+ if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
+ return 0;
+
+ if (mutex_lock_interruptible(&ppgtt->pin_mutex))
+ return -EINTR;
+
+ /*
+ * PPGTT PDEs reside in the GGTT and consists of 512 entries. The
+ * allocator works in address space sizes, so it's multiplied by page
+ * size. We allocate at the top of the GTT to avoid fragmentation.
+ */
+ err = 0;
+ if (!atomic_read(&ppgtt->pin_count))
+ err = i915_ggtt_pin(ppgtt->vma, GEN6_PD_ALIGN, PIN_HIGH);
+ if (!err)
+ atomic_inc(&ppgtt->pin_count);
+ mutex_unlock(&ppgtt->pin_mutex);
+
+ return err;
+}
+
+void gen6_ppgtt_unpin(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+ GEM_BUG_ON(!atomic_read(&ppgtt->pin_count));
+ if (atomic_dec_and_test(&ppgtt->pin_count))
+ i915_vma_unpin(ppgtt->vma);
+}
+
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
+{
+ struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+ if (!atomic_read(&ppgtt->pin_count))
+ return;
+
+ i915_vma_unpin(ppgtt->vma);
+ atomic_set(&ppgtt->pin_count, 0);
+}
+
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
+{
+ struct i915_ggtt * const ggtt = gt->ggtt;
+ struct gen6_ppgtt *ppgtt;
+ int err;
+
+ ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+ if (!ppgtt)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&ppgtt->flush);
+ mutex_init(&ppgtt->pin_mutex);
+
+ ppgtt_init(&ppgtt->base, gt);
+ ppgtt->base.vm.top = 1;
+
+ ppgtt->base.vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+ ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range;
+ ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range;
+ ppgtt->base.vm.insert_entries = gen6_ppgtt_insert_entries;
+ ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
+
+ ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
+
+ ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd));
+ if (!ppgtt->base.pd) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ err = gen6_ppgtt_init_scratch(ppgtt);
+ if (err)
+ goto err_pd;
+
+ ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
+ if (IS_ERR(ppgtt->vma)) {
+ err = PTR_ERR(ppgtt->vma);
+ goto err_scratch;
+ }
+
+ return &ppgtt->base;
+
+err_scratch:
+ free_scratch(&ppgtt->base.vm);
+err_pd:
+ kfree(ppgtt->base.pd);
+err_free:
+ mutex_destroy(&ppgtt->pin_mutex);
+ kfree(ppgtt);
+ return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.h b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
new file mode 100644
index 000000000000..72e481806c96
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN6_PPGTT_H__
+#define __GEN6_PPGTT_H__
+
+#include "intel_gtt.h"
+
+struct gen6_ppgtt {
+ struct i915_ppgtt base;
+
+ struct mutex flush;
+ struct i915_vma *vma;
+ gen6_pte_t __iomem *pd_addr;
+
+ atomic_t pin_count;
+ struct mutex pin_mutex;
+
+ bool scan_for_unused_pt;
+};
+
+static inline u32 gen6_pte_index(u32 addr)
+{
+ return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pte_count(u32 addr, u32 length)
+{
+ return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline u32 gen6_pde_index(u32 addr)
+{
+ return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
+#define __to_gen6_ppgtt(base) container_of(base, struct gen6_ppgtt, base)
+
+static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
+{
+ BUILD_BUG_ON(offsetof(struct gen6_ppgtt, base));
+ return __to_gen6_ppgtt(base);
+}
+
+/*
+ * gen6_for_each_pde() iterates over every pde from start until start+length.
+ * If start and start+length are not perfectly divisible, the macro will round
+ * down and up as needed. Start=0 and length=2G effectively iterates over
+ * every PDE in the system. The macro modifies ALL its parameters except 'pd',
+ * so each of the other parameters should preferably be a simple variable, or
+ * at most an lvalue with no side-effects!
+ */
+#define gen6_for_each_pde(pt, pd, start, length, iter) \
+ for (iter = gen6_pde_index(start); \
+ length > 0 && iter < I915_PDES && \
+ (pt = i915_pt_entry(pd, iter), true); \
+ ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \
+ temp = min(temp - start, length); \
+ start += temp, length -= temp; }), ++iter)
+
+#define gen6_for_all_pdes(pt, pd, iter) \
+ for (iter = 0; \
+ iter < I915_PDES && \
+ (pt = i915_pt_entry(pd, iter), true); \
+ ++iter)
+
+int gen6_ppgtt_pin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin(struct i915_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
+void gen6_ppgtt_enable(struct intel_gt *gt);
+void gen7_ppgtt_enable(struct intel_gt *gt);
+struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/gen6_renderstate.c b/drivers/gpu/drm/i915/gt/gen6_renderstate.c
new file mode 100644
index 000000000000..11c8e7b3dd7c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen6_renderstate.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Generated by: intel-gpu-tools-1.8-220-g01153e7
+ */
+
+#include "intel_renderstate.h"
+
+static const u32 gen6_null_state_relocs[] = {
+ 0x00000020,
+ 0x00000024,
+ 0x0000002c,
+ 0x000001e0,
+ 0x000001e4,
+ -1,
+};
+
+static const u32 gen6_null_state_batch[] = {
+ 0x69040000,
+ 0x790d0001,
+ 0x00000000,
+ 0x00000000,
+ 0x78180000,
+ 0x00000001,
+ 0x61010008,
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001,
+ 0x61020000,
+ 0x00000000,
+ 0x78050001,
+ 0x00000018,
+ 0x00000000,
+ 0x780d1002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000420,
+ 0x78150003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78100004,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78160003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78110005,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78120002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78170003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79050005,
+ 0xe0040000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79100000,
+ 0x00000000,
+ 0x79000002,
+ 0xffffffff,
+ 0x00000000,
+ 0x00000000,
+ 0x780e0002,
+ 0x00000441,
+ 0x00000401,
+ 0x00000401,
+ 0x78021002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000400,
+ 0x78130012,
+ 0x00400810,
+ 0x00000000,
+ 0x20000000,
+ 0x04000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78140007,
+ 0x00000280,
+ 0x08080000,
+ 0x00000000,
+ 0x00060000,
+ 0x4e080002,
+ 0x00100400,
+ 0x00000000,
+ 0x00000000,
+ 0x78090005,
+ 0x02000000,
+ 0x22220000,
+ 0x02f60000,
+ 0x11330000,
+ 0x02850004,
+ 0x11220000,
+ 0x78011002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000200,
+ 0x78080003,
+ 0x00002000,
+ 0x00000448, /* reloc */
+ 0x00000448, /* reloc */
+ 0x00000000,
+ 0x05000000, /* cmds end */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000220, /* state start */
+ 0x00000240,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0060005a,
+ 0x204077be,
+ 0x000000c0,
+ 0x008d0040,
+ 0x0060005a,
+ 0x206077be,
+ 0x000000c0,
+ 0x008d0080,
+ 0x0060005a,
+ 0x208077be,
+ 0x000000d0,
+ 0x008d0040,
+ 0x0060005a,
+ 0x20a077be,
+ 0x000000d0,
+ 0x008d0080,
+ 0x00000201,
+ 0x20080061,
+ 0x00000000,
+ 0x00000000,
+ 0x00600001,
+ 0x20200022,
+ 0x008d0000,
+ 0x00000000,
+ 0x02800031,
+ 0x21c01cc9,
+ 0x00000020,
+ 0x0a8a0001,
+ 0x00600001,
+ 0x204003be,
+ 0x008d01c0,
+ 0x00000000,
+ 0x00600001,
+ 0x206003be,
+ 0x008d01e0,
+ 0x00000000,
+ 0x00600001,
+ 0x208003be,
+ 0x008d0200,
+ 0x00000000,
+ 0x00600001,
+ 0x20a003be,
+ 0x008d0220,
+ 0x00000000,
+ 0x00600001,
+ 0x20c003be,
+ 0x008d0240,
+ 0x00000000,
+ 0x00600001,
+ 0x20e003be,
+ 0x008d0260,
+ 0x00000000,
+ 0x00600001,
+ 0x210003be,
+ 0x008d0280,
+ 0x00000000,
+ 0x00600001,
+ 0x212003be,
+ 0x008d02a0,
+ 0x00000000,
+ 0x05800031,
+ 0x24001cc8,
+ 0x00000040,
+ 0x90019000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0000007e,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x30000000,
+ 0x00000124,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xf99a130c,
+ 0x799a130c,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80000031,
+ 0x00000003,
+ 0x00000000, /* state end */
+};
+
+RO_RENDERSTATE(6);
diff --git a/drivers/gpu/drm/i915/gt/gen7_renderstate.c b/drivers/gpu/drm/i915/gt/gen7_renderstate.c
new file mode 100644
index 000000000000..655180646152
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen7_renderstate.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Generated by: intel-gpu-tools-1.8-220-g01153e7
+ */
+
+#include "intel_renderstate.h"
+
+static const u32 gen7_null_state_relocs[] = {
+ 0x0000000c,
+ 0x00000010,
+ 0x00000018,
+ 0x000001ec,
+ -1,
+};
+
+static const u32 gen7_null_state_batch[] = {
+ 0x69040000,
+ 0x61010008,
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001,
+ 0x790d0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78180000,
+ 0x00000001,
+ 0x79160000,
+ 0x00000008,
+ 0x78300000,
+ 0x02010040,
+ 0x78310000,
+ 0x04000000,
+ 0x78320000,
+ 0x04000000,
+ 0x78330000,
+ 0x02000000,
+ 0x78100004,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781b0005,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781c0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781d0004,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78110005,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78120002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78210000,
+ 0x00000000,
+ 0x78130005,
+ 0x00000000,
+ 0x20000000,
+ 0x04000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78140001,
+ 0x20000800,
+ 0x00000000,
+ 0x781e0001,
+ 0x00000000,
+ 0x00000000,
+ 0x78050005,
+ 0xe0040000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78040001,
+ 0x00000000,
+ 0x00000000,
+ 0x78240000,
+ 0x00000240,
+ 0x78230000,
+ 0x00000260,
+ 0x782f0000,
+ 0x00000280,
+ 0x781f000c,
+ 0x00400810,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78200006,
+ 0x000002c0,
+ 0x08080000,
+ 0x00000000,
+ 0x28000402,
+ 0x00060000,
+ 0x00000000,
+ 0x00000000,
+ 0x78090005,
+ 0x02000000,
+ 0x22220000,
+ 0x02f60000,
+ 0x11230000,
+ 0x02f60004,
+ 0x11230000,
+ 0x78080003,
+ 0x00006008,
+ 0x00000340, /* reloc */
+ 0xffffffff,
+ 0x00000000,
+ 0x782a0000,
+ 0x00000360,
+ 0x79000002,
+ 0xffffffff,
+ 0x00000000,
+ 0x00000000,
+ 0x7b000005,
+ 0x0000000f,
+ 0x00000003,
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000000,
+ 0x05000000, /* cmds end */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000031, /* state start */
+ 0x00000003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0xf99a130c,
+ 0x799a130c,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000492,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0080005a,
+ 0x2e2077bd,
+ 0x000000c0,
+ 0x008d0040,
+ 0x0080005a,
+ 0x2e6077bd,
+ 0x000000d0,
+ 0x008d0040,
+ 0x02800031,
+ 0x21801fa9,
+ 0x008d0e20,
+ 0x08840001,
+ 0x00800001,
+ 0x2e2003bd,
+ 0x008d0180,
+ 0x00000000,
+ 0x00800001,
+ 0x2e6003bd,
+ 0x008d01c0,
+ 0x00000000,
+ 0x00800001,
+ 0x2ea003bd,
+ 0x008d0200,
+ 0x00000000,
+ 0x00800001,
+ 0x2ee003bd,
+ 0x008d0240,
+ 0x00000000,
+ 0x05800031,
+ 0x20001fa8,
+ 0x008d0e20,
+ 0x90031000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000380,
+ 0x000003a0,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* state end */
+};
+
+RO_RENDERSTATE(7);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
new file mode 100644
index 000000000000..4d1de2d97d5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/log2.h>
+
+#include "gen8_ppgtt.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+#include "i915_vgpu.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+static u64 gen8_pde_encode(const dma_addr_t addr,
+ const enum i915_cache_level level)
+{
+ u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
+
+ if (level != I915_CACHE_NONE)
+ pde |= PPAT_CACHED_PDE;
+ else
+ pde |= PPAT_UNCACHED;
+
+ return pde;
+}
+
+static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
+{
+ struct drm_i915_private *i915 = ppgtt->vm.i915;
+ struct intel_uncore *uncore = ppgtt->vm.gt->uncore;
+ enum vgt_g2v_type msg;
+ int i;
+
+ if (create)
+ atomic_inc(px_used(ppgtt->pd)); /* never remove */
+ else
+ atomic_dec(px_used(ppgtt->pd));
+
+ mutex_lock(&i915->vgpu.lock);
+
+ if (i915_vm_is_4lvl(&ppgtt->vm)) {
+ const u64 daddr = px_dma(ppgtt->pd);
+
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
+
+ msg = create ?
+ VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY;
+ } else {
+ for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+ const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[i].lo),
+ lower_32_bits(daddr));
+ intel_uncore_write(uncore,
+ vgtif_reg(pdp[i].hi),
+ upper_32_bits(daddr));
+ }
+
+ msg = create ?
+ VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
+ VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY;
+ }
+
+ /* g2v_notify atomically (via hv trap) consumes the message packet. */
+ intel_uncore_write(uncore, vgtif_reg(g2v_notify), msg);
+
+ mutex_unlock(&i915->vgpu.lock);
+}
+
+/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
+#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
+#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
+#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
+#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
+#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
+#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
+#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))
+
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
+static inline unsigned int
+gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
+{
+ const int shift = gen8_pd_shift(lvl);
+ const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+ GEM_BUG_ON(start >= end);
+ end += ~mask >> gen8_pd_shift(1);
+
+ *idx = i915_pde_index(start, shift);
+ if ((start ^ end) & mask)
+ return GEN8_PDES - *idx;
+ else
+ return i915_pde_index(end, shift) - *idx;
+}
+
+static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
+{
+ const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);
+
+ GEM_BUG_ON(start >= end);
+ return (start ^ end) & mask && (start & ~mask) == 0;
+}
+
+static inline unsigned int gen8_pt_count(u64 start, u64 end)
+{
+ GEM_BUG_ON(start >= end);
+ if ((start ^ end) >> gen8_pd_shift(1))
+ return GEN8_PDES - (start & (GEN8_PDES - 1));
+ else
+ return end - start;
+}
+
+static inline unsigned int
+gen8_pd_top_count(const struct i915_address_space *vm)
+{
+ unsigned int shift = __gen8_pte_shift(vm->top);
+ return (vm->total + (1ull << shift) - 1) >> shift;
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
+{
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+
+ if (vm->top == 2)
+ return ppgtt->pd;
+ else
+ return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
+}
+
+static inline struct i915_page_directory *
+gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
+{
+ return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
+}
+
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ int count, int lvl)
+{
+ if (lvl) {
+ void **pde = pd->entry;
+
+ do {
+ if (!*pde)
+ continue;
+
+ __gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
+ } while (pde++, --count);
+ }
+
+ free_px(vm, pd);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+
+ if (intel_vgpu_active(vm->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, false);
+
+ __gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
+ free_scratch(vm);
+}
+
+static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 start, const u64 end, int lvl)
+{
+ const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ unsigned int idx, len;
+
+ GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+ len = gen8_pd_range(start, end, lvl--, &idx);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, start, end,
+ idx, len, atomic_read(px_used(pd)));
+ GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
+
+ do {
+ struct i915_page_table *pt = pd->entry[idx];
+
+ if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
+ gen8_pd_contains(start, end, lvl)) {
+ DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
+ __func__, vm, lvl + 1, idx, start, end);
+ clear_pd_entry(pd, idx, scratch);
+ __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
+ start += (u64)I915_PDES << gen8_pd_shift(lvl);
+ continue;
+ }
+
+ if (lvl) {
+ start = __gen8_ppgtt_clear(vm, as_pd(pt),
+ start, end, lvl);
+ } else {
+ unsigned int count;
+ u64 *vaddr;
+
+ count = gen8_pt_count(start, end);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
+ __func__, vm, lvl, start, end,
+ gen8_pd_index(start, 0), count,
+ atomic_read(&pt->used));
+ GEM_BUG_ON(!count || count >= atomic_read(&pt->used));
+
+ vaddr = kmap_atomic_px(pt);
+ memset64(vaddr + gen8_pd_index(start, 0),
+ vm->scratch[0].encode,
+ count);
+ kunmap_atomic(vaddr);
+
+ atomic_sub(count, &pt->used);
+ start += count;
+ }
+
+ if (release_pd_entry(pd, idx, pt, scratch))
+ free_px(vm, pt);
+ } while (idx++, --len);
+
+ return start;
+}
+
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+ start >>= GEN8_PTE_SHIFT;
+ length >>= GEN8_PTE_SHIFT;
+ GEM_BUG_ON(length == 0);
+
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ start, start + length, vm->top);
+}
+
+static int __gen8_ppgtt_alloc(struct i915_address_space * const vm,
+ struct i915_page_directory * const pd,
+ u64 * const start, const u64 end, int lvl)
+{
+ const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
+ struct i915_page_table *alloc = NULL;
+ unsigned int idx, len;
+ int ret = 0;
+
+ GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);
+
+ len = gen8_pd_range(*start, end, lvl--, &idx);
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
+ __func__, vm, lvl + 1, *start, end,
+ idx, len, atomic_read(px_used(pd)));
+ GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1));
+
+ spin_lock(&pd->lock);
+ GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */
+ do {
+ struct i915_page_table *pt = pd->entry[idx];
+
+ if (!pt) {
+ spin_unlock(&pd->lock);
+
+ DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n",
+ __func__, vm, lvl + 1, idx);
+
+ pt = fetch_and_zero(&alloc);
+ if (lvl) {
+ if (!pt) {
+ pt = &alloc_pd(vm)->pt;
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto out;
+ }
+ }
+
+ fill_px(pt, vm->scratch[lvl].encode);
+ } else {
+ if (!pt) {
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt)) {
+ ret = PTR_ERR(pt);
+ goto out;
+ }
+ }
+
+ if (intel_vgpu_active(vm->i915) ||
+ gen8_pt_count(*start, end) < I915_PDES)
+ fill_px(pt, vm->scratch[lvl].encode);
+ }
+
+ spin_lock(&pd->lock);
+ if (likely(!pd->entry[idx]))
+ set_pd_entry(pd, idx, pt);
+ else
+ alloc = pt, pt = pd->entry[idx];
+ }
+
+ if (lvl) {
+ atomic_inc(&pt->used);
+ spin_unlock(&pd->lock);
+
+ ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+ start, end, lvl);
+ if (unlikely(ret)) {
+ if (release_pd_entry(pd, idx, pt, scratch))
+ free_px(vm, pt);
+ goto out;
+ }
+
+ spin_lock(&pd->lock);
+ atomic_dec(&pt->used);
+ GEM_BUG_ON(!atomic_read(&pt->used));
+ } else {
+ unsigned int count = gen8_pt_count(*start, end);
+
+ DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n",
+ __func__, vm, lvl, *start, end,
+ gen8_pd_index(*start, 0), count,
+ atomic_read(&pt->used));
+
+ atomic_add(count, &pt->used);
+ /* All other pdes may be simultaneously removed */
+ GEM_BUG_ON(atomic_read(&pt->used) > NALLOC * I915_PDES);
+ *start += count;
+ }
+ } while (idx++, --len);
+ spin_unlock(&pd->lock);
+out:
+ if (alloc)
+ free_px(vm, alloc);
+ return ret;
+}
+
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ u64 from;
+ int err;
+
+ GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT)));
+ GEM_BUG_ON(range_overflows(start, length, vm->total));
+
+ start >>= GEN8_PTE_SHIFT;
+ length >>= GEN8_PTE_SHIFT;
+ GEM_BUG_ON(length == 0);
+ from = start;
+
+ err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+ &start, start + length, vm->top);
+ if (unlikely(err && from != start))
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ from, start, vm->top);
+
+ return err;
+}
+
+static __always_inline u64
+gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
+ struct i915_page_directory *pdp,
+ struct sgt_dma *iter,
+ u64 idx,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct i915_page_directory *pd;
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ gen8_pte_t *vaddr;
+
+ pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2));
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+ do {
+ GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE);
+ vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma;
+
+ iter->dma += I915_GTT_PAGE_SIZE;
+ if (iter->dma >= iter->max) {
+ iter->sg = __sg_next(iter->sg);
+ if (!iter->sg) {
+ idx = 0;
+ break;
+ }
+
+ iter->dma = sg_dma_address(iter->sg);
+ iter->max = iter->dma + iter->sg->length;
+ }
+
+ if (gen8_pd_index(++idx, 0) == 0) {
+ if (gen8_pd_index(idx, 1) == 0) {
+ /* Limited by sg length for 3lvl */
+ if (gen8_pd_index(idx, 2) == 0)
+ break;
+
+ pd = pdp->entry[gen8_pd_index(idx, 2)];
+ }
+
+ kunmap_atomic(vaddr);
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1)));
+ }
+ } while (1);
+ kunmap_atomic(vaddr);
+
+ return idx;
+}
+
+static void gen8_ppgtt_insert_huge(struct i915_vma *vma,
+ struct sgt_dma *iter,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags);
+ u64 start = vma->node.start;
+ dma_addr_t rem = iter->sg->length;
+
+ GEM_BUG_ON(!i915_vm_is_4lvl(vma->vm));
+
+ do {
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_address(vma->vm, start);
+ struct i915_page_directory * const pd =
+ i915_pd_entry(pdp, __gen8_pte_index(start, 2));
+ gen8_pte_t encode = pte_encode;
+ unsigned int maybe_64K = -1;
+ unsigned int page_size;
+ gen8_pte_t *vaddr;
+ u16 index;
+
+ if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
+ rem >= I915_GTT_PAGE_SIZE_2M &&
+ !__gen8_pte_index(start, 0)) {
+ index = __gen8_pte_index(start, 1);
+ encode |= GEN8_PDE_PS_2M;
+ page_size = I915_GTT_PAGE_SIZE_2M;
+
+ vaddr = kmap_atomic_px(pd);
+ } else {
+ struct i915_page_table *pt =
+ i915_pt_entry(pd, __gen8_pte_index(start, 1));
+
+ index = __gen8_pte_index(start, 0);
+ page_size = I915_GTT_PAGE_SIZE;
+
+ if (!index &&
+ vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+ IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+ rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))
+ maybe_64K = __gen8_pte_index(start, 1);
+
+ vaddr = kmap_atomic_px(pt);
+ }
+
+ do {
+ GEM_BUG_ON(iter->sg->length < page_size);
+ vaddr[index++] = encode | iter->dma;
+
+ start += page_size;
+ iter->dma += page_size;
+ rem -= page_size;
+ if (iter->dma >= iter->max) {
+ iter->sg = __sg_next(iter->sg);
+ if (!iter->sg)
+ break;
+
+ rem = iter->sg->length;
+ iter->dma = sg_dma_address(iter->sg);
+ iter->max = iter->dma + rem;
+
+ if (maybe_64K != -1 && index < I915_PDES &&
+ !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+ rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)))
+ maybe_64K = -1;
+
+ if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
+ break;
+ }
+ } while (rem >= page_size && index < I915_PDES);
+
+ kunmap_atomic(vaddr);
+
+ /*
+ * Is it safe to mark the 2M block as 64K? -- Either we have
+ * filled whole page-table with 64K entries, or filled part of
+ * it and have reached the end of the sg table and we have
+ * enough padding.
+ */
+ if (maybe_64K != -1 &&
+ (index == I915_PDES ||
+ (i915_vm_has_scratch_64K(vma->vm) &&
+ !iter->sg && IS_ALIGNED(vma->node.start +
+ vma->node.size,
+ I915_GTT_PAGE_SIZE_2M)))) {
+ vaddr = kmap_atomic_px(pd);
+ vaddr[maybe_64K] |= GEN8_PDE_IPS_64K;
+ kunmap_atomic(vaddr);
+ page_size = I915_GTT_PAGE_SIZE_64K;
+
+ /*
+ * We write all 4K page entries, even when using 64K
+ * pages. In order to verify that the HW isn't cheating
+ * by using the 4K PTE instead of the 64K PTE, we want
+ * to remove all the surplus entries. If the HW skipped
+ * the 64K PTE, it will read/write into the scratch page
+ * instead - which we detect as missing results during
+ * selftests.
+ */
+ if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) {
+ u16 i;
+
+ encode = vma->vm->scratch[0].encode;
+ vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K));
+
+ for (i = 1; i < index; i += 16)
+ memset64(vaddr + i, encode, 15);
+
+ kunmap_atomic(vaddr);
+ }
+ }
+
+ vma->page_sizes.gtt |= page_size;
+ } while (iter->sg);
+}
+
+static void gen8_ppgtt_insert(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);
+ struct sgt_dma iter = sgt_dma(vma);
+
+ if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ gen8_ppgtt_insert_huge(vma, &iter, cache_level, flags);
+ } else {
+ u64 idx = vma->node.start >> GEN8_PTE_SHIFT;
+
+ do {
+ struct i915_page_directory * const pdp =
+ gen8_pdp_for_page_index(vm, idx);
+
+ idx = gen8_ppgtt_insert_pte(ppgtt, pdp, &iter, idx,
+ cache_level, flags);
+ } while (idx);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+ }
+}
+
+static int gen8_init_scratch(struct i915_address_space *vm)
+{
+ int ret;
+ int i;
+
+ /*
+ * If everybody agrees to not to write into the scratch page,
+ * we can reuse it for all vm, keeping contexts and processes separate.
+ */
+ if (vm->has_read_only && vm->gt->vm && !i915_is_ggtt(vm->gt->vm)) {
+ struct i915_address_space *clone = vm->gt->vm;
+
+ GEM_BUG_ON(!clone->has_read_only);
+
+ vm->scratch_order = clone->scratch_order;
+ memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+ px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */
+ return 0;
+ }
+
+ ret = setup_scratch_page(vm, __GFP_HIGHMEM);
+ if (ret)
+ return ret;
+
+ vm->scratch[0].encode =
+ gen8_pte_encode(px_dma(&vm->scratch[0]),
+ I915_CACHE_LLC, vm->has_read_only);
+
+ for (i = 1; i <= vm->top; i++) {
+ if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i]))))
+ goto free_scratch;
+
+ fill_px(&vm->scratch[i], vm->scratch[i - 1].encode);
+ vm->scratch[i].encode =
+ gen8_pde_encode(px_dma(&vm->scratch[i]),
+ I915_CACHE_LLC);
+ }
+
+ return 0;
+
+free_scratch:
+ free_scratch(vm);
+ return -ENOMEM;
+}
+
+static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
+{
+ struct i915_address_space *vm = &ppgtt->vm;
+ struct i915_page_directory *pd = ppgtt->pd;
+ unsigned int idx;
+
+ GEM_BUG_ON(vm->top != 2);
+ GEM_BUG_ON(gen8_pd_top_count(vm) != GEN8_3LVL_PDPES);
+
+ for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) {
+ struct i915_page_directory *pde;
+
+ pde = alloc_pd(vm);
+ if (IS_ERR(pde))
+ return PTR_ERR(pde);
+
+ fill_px(pde, vm->scratch[1].encode);
+ set_pd_entry(pd, idx, pde);
+ atomic_inc(px_used(pde)); /* keep pinned */
+ }
+ wmb();
+
+ return 0;
+}
+
+static struct i915_page_directory *
+gen8_alloc_top_pd(struct i915_address_space *vm)
+{
+ const unsigned int count = gen8_pd_top_count(vm);
+ struct i915_page_directory *pd;
+
+ GEM_BUG_ON(count > ARRAY_SIZE(pd->entry));
+
+ pd = __alloc_pd(offsetof(typeof(*pd), entry[count]));
+ if (unlikely(!pd))
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count);
+ atomic_inc(px_used(pd)); /* mark as pinned */
+ return pd;
+}
+
+/*
+ * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
+ * with a net effect resembling a 2-level page table in normal x86 terms. Each
+ * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
+ * space.
+ *
+ */
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
+{
+ struct i915_ppgtt *ppgtt;
+ int err;
+
+ ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+ if (!ppgtt)
+ return ERR_PTR(-ENOMEM);
+
+ ppgtt_init(ppgtt, gt);
+ ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2;
+
+ /*
+ * From bdw, there is hw support for read-only pages in the PPGTT.
+ *
+ * Gen11 has HSDES#:1807136187 unresolved. Disable ro support
+ * for now.
+ *
+ * Gen12 has inherited the same read-only fault issue from gen11.
+ */
+ ppgtt->vm.has_read_only = !IS_GEN_RANGE(gt->i915, 11, 12);
+
+ /*
+ * There are only few exceptions for gen >=6. chv and bxt.
+ * And we are not sure about the latter so play safe for now.
+ */
+ if (IS_CHERRYVIEW(gt->i915) || IS_BROXTON(gt->i915))
+ ppgtt->vm.pt_kmap_wc = true;
+
+ err = gen8_init_scratch(&ppgtt->vm);
+ if (err)
+ goto err_free;
+
+ ppgtt->pd = gen8_alloc_top_pd(&ppgtt->vm);
+ if (IS_ERR(ppgtt->pd)) {
+ err = PTR_ERR(ppgtt->pd);
+ goto err_free_scratch;
+ }
+
+ if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+ err = gen8_preallocate_top_level_pdp(ppgtt);
+ if (err)
+ goto err_free_pd;
+ }
+
+ ppgtt->vm.bind_async_flags = I915_VMA_LOCAL_BIND;
+ ppgtt->vm.insert_entries = gen8_ppgtt_insert;
+ ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+ ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
+ if (intel_vgpu_active(gt->i915))
+ gen8_ppgtt_notify_vgt(ppgtt, true);
+
+ ppgtt->vm.cleanup = gen8_ppgtt_cleanup;
+
+ return ppgtt;
+
+err_free_pd:
+ __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd,
+ gen8_pd_top_count(&ppgtt->vm), ppgtt->vm.top);
+err_free_scratch:
+ free_scratch(&ppgtt->vm);
+err_free:
+ kfree(ppgtt);
+ return ERR_PTR(err);
+}
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
new file mode 100644
index 000000000000..76a08b9c1f5c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#ifndef __GEN8_PPGTT_H__
+#define __GEN8_PPGTT_H__
+
+struct intel_gt;
+
+struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/gen8_renderstate.c b/drivers/gpu/drm/i915/gt/gen8_renderstate.c
new file mode 100644
index 000000000000..95288a34c15d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen8_renderstate.c
@@ -0,0 +1,983 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Generated by: intel-gpu-tools-1.8-220-g01153e7
+ */
+
+#include "intel_renderstate.h"
+
+static const u32 gen8_null_state_relocs[] = {
+ 0x00000798,
+ 0x000007a4,
+ 0x000007ac,
+ 0x000007bc,
+ -1,
+};
+
+static const u32 gen8_null_state_batch[] = {
+ 0x7a000004,
+ 0x01000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x69040000,
+ 0x78140000,
+ 0x04000000,
+ 0x7820000a,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78130002,
+ 0x00000000,
+ 0x00000000,
+ 0x02001808,
+ 0x781f0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78510009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78100007,
+ 0x00000000,
+ 0x00000000,
+ 0x00010000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781b0007,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000800,
+ 0x00000000,
+ 0x78110008,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781e0003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781d0007,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78120002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78500003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781c0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x780c0000,
+ 0x00000000,
+ 0x78520003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78300000,
+ 0x08010040,
+ 0x78310000,
+ 0x1e000000,
+ 0x78320000,
+ 0x1e000000,
+ 0x78330000,
+ 0x1e000000,
+ 0x79190002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x791a0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x791b0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79120000,
+ 0x00000000,
+ 0x79130000,
+ 0x00000000,
+ 0x79140000,
+ 0x00000000,
+ 0x79150000,
+ 0x00000000,
+ 0x79160000,
+ 0x00000000,
+ 0x78150009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78190009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781a0009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78160009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78170009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78490001,
+ 0x00000000,
+ 0x00000000,
+ 0x784a0000,
+ 0x00000000,
+ 0x784b0000,
+ 0x00000004,
+ 0x79170101,
+ 0x00000000,
+ 0x00000080,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x20000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x40000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x60000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x6101000e,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00001001,
+ 0x00001001,
+ 0x00000001,
+ 0x00001001,
+ 0x61020001,
+ 0x00000000,
+ 0x00000000,
+ 0x79000002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78050006,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0x40000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0x80000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0xc0000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79080001,
+ 0x00000000,
+ 0x00000000,
+ 0x790a0001,
+ 0x00000000,
+ 0x00000000,
+ 0x78060003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78070003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78040001,
+ 0x00000000,
+ 0x00000000,
+ 0x79110000,
+ 0x00000000,
+ 0x780d0000,
+ 0x00000000,
+ 0x79060000,
+ 0x00000000,
+ 0x7907001f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x7902000f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x790c000f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x780a0003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78080083,
+ 0x00004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x04004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x08004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x10004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x14004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x18004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x1c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x20004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x24004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x28004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x2c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x30004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x34004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x38004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x3c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x40004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x44004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x48004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x4c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x50004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x54004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x58004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x5c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x60004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x64004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x68004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x6c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x70004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x74004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x7c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78090043,
+ 0x02000000,
+ 0x22220000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x680b0001,
+ 0x78260000,
+ 0x00000000,
+ 0x78270000,
+ 0x00000000,
+ 0x78280000,
+ 0x00000000,
+ 0x78290000,
+ 0x00000000,
+ 0x782a0000,
+ 0x00000000,
+ 0x780e0000,
+ 0x00000dc1,
+ 0x78240000,
+ 0x00000e01,
+ 0x784f0000,
+ 0x80000100,
+ 0x784d0000,
+ 0x40000000,
+ 0x782b0000,
+ 0x00000000,
+ 0x782c0000,
+ 0x00000000,
+ 0x782d0000,
+ 0x00000000,
+ 0x782e0000,
+ 0x00000000,
+ 0x782f0000,
+ 0x00000000,
+ 0x780f0000,
+ 0x00000000,
+ 0x78230000,
+ 0x00000e60,
+ 0x78210000,
+ 0x00000e80,
+ 0x7b000005,
+ 0x00000004,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000000,
+ 0x05000000, /* cmds end */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* state start */
+ 0x00000000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* state end */
+};
+
+RO_RENDERSTATE(8);
diff --git a/drivers/gpu/drm/i915/gt/gen9_renderstate.c b/drivers/gpu/drm/i915/gt/gen9_renderstate.c
new file mode 100644
index 000000000000..7d3ac02f0177
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/gen9_renderstate.c
@@ -0,0 +1,999 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Generated by: intel-gpu-tools-1.19-177-g68e2eab2
+ */
+
+#include "intel_renderstate.h"
+
+static const u32 gen9_null_state_relocs[] = {
+ 0x000007a8,
+ 0x000007b4,
+ 0x000007bc,
+ 0x000007cc,
+ -1,
+};
+
+static const u32 gen9_null_state_batch[] = {
+ 0x7a000004,
+ 0x01000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x69040300,
+ 0x78140000,
+ 0x04000000,
+ 0x7820000a,
+ 0x00000000,
+ 0x00000000,
+ 0x80000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78130002,
+ 0x00000000,
+ 0x00000000,
+ 0x02001808,
+ 0x781f0004,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78510009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78100007,
+ 0x00000000,
+ 0x00000000,
+ 0x00010000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781b0007,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000800,
+ 0x00000000,
+ 0x78110008,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781e0003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781d0009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78120002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78500003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781c0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x780c0000,
+ 0x00000000,
+ 0x78520003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78300000,
+ 0x08010040,
+ 0x78310000,
+ 0x1e000000,
+ 0x78320000,
+ 0x1e000000,
+ 0x78330000,
+ 0x1e000000,
+ 0x79190002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x791a0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x791b0002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79120000,
+ 0x00000000,
+ 0x79130000,
+ 0x00000000,
+ 0x79140000,
+ 0x00000000,
+ 0x79150000,
+ 0x00000000,
+ 0x79160000,
+ 0x00000000,
+ 0x78150009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78190009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x781a0009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78160009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78170009,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78490001,
+ 0x00000000,
+ 0x00000000,
+ 0x784a0000,
+ 0x00000000,
+ 0x784b0000,
+ 0x00000004,
+ 0x79170101,
+ 0x00000000,
+ 0x00000080,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x20000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x40000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79180006,
+ 0x60000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x61010011,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001, /* reloc */
+ 0x00000000,
+ 0x00001001,
+ 0x00001001,
+ 0x00000001,
+ 0x00001001,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x61020001,
+ 0x00000000,
+ 0x00000000,
+ 0x79000002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78050006,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0x40000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0x80000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79040002,
+ 0xc0000000,
+ 0x00000000,
+ 0x00000000,
+ 0x79080001,
+ 0x00000000,
+ 0x00000000,
+ 0x790a0001,
+ 0x00000000,
+ 0x00000000,
+ 0x78060003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78070003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78040001,
+ 0x00000000,
+ 0x00000000,
+ 0x79110000,
+ 0x00000000,
+ 0x780d0000,
+ 0x00000000,
+ 0x79060000,
+ 0x00000000,
+ 0x7907001f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x7902000f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x790c000f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x780a0003,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78080083,
+ 0x00004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x04004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x08004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x0c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x10004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x14004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x18004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x1c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x20004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x24004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x28004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x2c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x30004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x34004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x38004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x3c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x40004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x44004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x48004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x4c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x50004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x54004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x58004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x5c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x60004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x64004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x68004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x6c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x70004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x74004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x7c004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x80004000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78090043,
+ 0x02000000,
+ 0x22220000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x78550003,
+ 0x0000000f,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x680b0001,
+ 0x780e0000,
+ 0x00000e01,
+ 0x78240000,
+ 0x00000e41,
+ 0x784f0000,
+ 0x80000100,
+ 0x784d0000,
+ 0x40000000,
+ 0x782b0000,
+ 0x00000000,
+ 0x782c0000,
+ 0x00000000,
+ 0x782d0000,
+ 0x00000000,
+ 0x782e0000,
+ 0x00000000,
+ 0x782f0000,
+ 0x00000000,
+ 0x780f0000,
+ 0x00000000,
+ 0x78230000,
+ 0x00000ea0,
+ 0x78210000,
+ 0x00000ec0,
+ 0x78260000,
+ 0x00000000,
+ 0x78270000,
+ 0x00000000,
+ 0x78280000,
+ 0x00000000,
+ 0x78290000,
+ 0x00000000,
+ 0x782a0000,
+ 0x00000000,
+ 0x7b000005,
+ 0x00000004,
+ 0x00000001,
+ 0x00000000,
+ 0x00000001,
+ 0x00000000,
+ 0x00000000,
+ 0x05000000, /* cmds end */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* state start */
+ 0x00000000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x3f800000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000, /* state end */
+};
+
+RO_RENDERSTATE(9);
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index c092bdf5f0bf..0ba524a414c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -27,6 +27,9 @@
#include <uapi/linux/sched/types.h>
#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
static void irq_enable(struct intel_engine_cs *engine)
{
@@ -34,9 +37,9 @@ static void irq_enable(struct intel_engine_cs *engine)
return;
/* Caller disables interrupts */
- spin_lock(&engine->i915->irq_lock);
+ spin_lock(&engine->gt->irq_lock);
engine->irq_enable(engine);
- spin_unlock(&engine->i915->irq_lock);
+ spin_unlock(&engine->gt->irq_lock);
}
static void irq_disable(struct intel_engine_cs *engine)
@@ -45,35 +48,38 @@ static void irq_disable(struct intel_engine_cs *engine)
return;
/* Caller disables interrupts */
- spin_lock(&engine->i915->irq_lock);
+ spin_lock(&engine->gt->irq_lock);
engine->irq_disable(engine);
- spin_unlock(&engine->i915->irq_lock);
+ spin_unlock(&engine->gt->irq_lock);
}
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
lockdep_assert_held(&b->irq_lock);
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
- irq_disable(container_of(b,
- struct intel_engine_cs,
- breadcrumbs));
+ irq_disable(engine);
b->irq_armed = false;
+ intel_gt_pm_put_async(engine->gt);
}
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ unsigned long flags;
if (!b->irq_armed)
return;
- spin_lock_irq(&b->irq_lock);
+ spin_lock_irqsave(&b->irq_lock, flags);
if (b->irq_armed)
__intel_breadcrumbs_disarm_irq(b);
- spin_unlock_irq(&b->irq_lock);
+ spin_unlock_irqrestore(&b->irq_lock, flags);
}
static inline bool __request_completed(const struct i915_request *rq)
@@ -112,23 +118,30 @@ __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp)
}
static void
-__dma_fence_signal__notify(struct dma_fence *fence)
+__dma_fence_signal__notify(struct dma_fence *fence,
+ const struct list_head *list)
{
struct dma_fence_cb *cur, *tmp;
lockdep_assert_held(fence->lock);
- lockdep_assert_irqs_disabled();
- list_for_each_entry_safe(cur, tmp, &fence->cb_list, node) {
+ list_for_each_entry_safe(cur, tmp, list, node) {
INIT_LIST_HEAD(&cur->node);
cur->func(fence, cur);
}
- INIT_LIST_HEAD(&fence->cb_list);
}
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
+ struct intel_engine_cs *engine =
+ container_of(b, struct intel_engine_cs, breadcrumbs);
+
+ intel_engine_add_retire(engine, tl);
+}
+
+static void signal_irq_work(struct irq_work *work)
+{
+ struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
const ktime_t timestamp = ktime_get();
struct intel_context *ce, *cn;
struct list_head *pos, *next;
@@ -175,8 +188,10 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
if (!list_is_first(pos, &ce->signals)) {
/* Advance the list to the first incomplete request */
__list_del_many(&ce->signals, pos);
- if (&ce->signals == pos) /* now empty */
+ if (&ce->signals == pos) { /* now empty */
list_del_init(&ce->signal_link);
+ add_retire(b, ce->timeline);
+ }
}
}
@@ -185,62 +200,29 @@ void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
list_for_each_safe(pos, next, &signal) {
struct i915_request *rq =
list_entry(pos, typeof(*rq), signal_link);
-
- __dma_fence_signal__timestamp(&rq->fence, timestamp);
+ struct list_head cb_list;
spin_lock(&rq->lock);
- __dma_fence_signal__notify(&rq->fence);
+ list_replace(&rq->fence.cb_list, &cb_list);
+ __dma_fence_signal__timestamp(&rq->fence, timestamp);
+ __dma_fence_signal__notify(&rq->fence, &cb_list);
spin_unlock(&rq->lock);
i915_request_put(rq);
}
}
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
-{
- local_irq_disable();
- intel_engine_breadcrumbs_irq(engine);
- local_irq_enable();
-}
-
-static void signal_irq_work(struct irq_work *work)
-{
- struct intel_engine_cs *engine =
- container_of(work, typeof(*engine), breadcrumbs.irq_work);
-
- intel_engine_breadcrumbs_irq(engine);
-}
-
-void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- spin_lock_irq(&b->irq_lock);
- if (!b->irq_enabled++)
- irq_enable(engine);
- GEM_BUG_ON(!b->irq_enabled); /* no overflow! */
- spin_unlock_irq(&b->irq_lock);
-}
-
-void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
-{
- struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
- spin_lock_irq(&b->irq_lock);
- GEM_BUG_ON(!b->irq_enabled); /* no underflow! */
- if (!--b->irq_enabled)
- irq_disable(engine);
- spin_unlock_irq(&b->irq_lock);
-}
-
-static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
struct intel_engine_cs *engine =
container_of(b, struct intel_engine_cs, breadcrumbs);
lockdep_assert_held(&b->irq_lock);
if (b->irq_armed)
- return;
+ return true;
+
+ if (!intel_gt_pm_get_if_awake(engine->gt))
+ return false;
/*
* The breadcrumb irq will be disarmed on the interrupt after the
@@ -260,6 +242,8 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
if (!b->irq_enabled++)
irq_enable(engine);
+
+ return true;
}
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -294,23 +278,23 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
lockdep_assert_held(&rq->lock);
- lockdep_assert_irqs_disabled();
if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
struct list_head *pos;
spin_lock(&b->irq_lock);
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
- __intel_breadcrumbs_arm_irq(b);
+ if (!__intel_breadcrumbs_arm_irq(b))
+ goto unlock;
/*
* We keep the seqno in retirement order, so we can break
- * inside intel_engine_breadcrumbs_irq as soon as we've passed
- * the last completed request (or seen a request that hasn't
- * event started). We could iterate the timeline->requests list,
+ * inside intel_engine_signal_breadcrumbs as soon as we've
+ * passed the last completed request (or seen a request that
+ * hasn't event started). We could walk the timeline->requests,
* but keeping a separate signalers_list has the advantage of
* hopefully being much smaller than the full list and so
* provides faster iteration and detection when there are no
@@ -333,6 +317,7 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
GEM_BUG_ON(!check_signal_order(ce, rq));
set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+unlock:
spin_unlock(&b->irq_lock);
}
@@ -344,7 +329,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
lockdep_assert_held(&rq->lock);
- lockdep_assert_irqs_disabled();
/*
* We must wait for b->irq_lock so that we know the interrupt handler
@@ -354,7 +338,7 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
*/
spin_lock(&b->irq_lock);
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
list_del(&rq->signal_link);
if (list_empty(&ce->signals))
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index 23120901c55f..57e8a051ddc2 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -13,6 +13,7 @@
#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
+#include "intel_ring.h"
static struct i915_global_context {
struct i915_global base;
@@ -30,8 +31,7 @@ void intel_context_free(struct intel_context *ce)
}
struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+intel_context_create(struct intel_engine_cs *engine)
{
struct intel_context *ce;
@@ -39,164 +39,266 @@ intel_context_create(struct i915_gem_context *ctx,
if (!ce)
return ERR_PTR(-ENOMEM);
- intel_context_init(ce, ctx, engine);
+ intel_context_init(ce, engine);
return ce;
}
-int __intel_context_do_pin(struct intel_context *ce)
+int intel_context_alloc_state(struct intel_context *ce)
{
- int err;
+ int err = 0;
if (mutex_lock_interruptible(&ce->pin_mutex))
return -EINTR;
- if (likely(!atomic_read(&ce->pin_count))) {
- intel_wakeref_t wakeref;
+ if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ err = ce->ops->alloc(ce);
+ if (unlikely(err))
+ goto unlock;
+
+ set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+ }
+
+unlock:
+ mutex_unlock(&ce->pin_mutex);
+ return err;
+}
+
+static int intel_context_active_acquire(struct intel_context *ce)
+{
+ int err;
+
+ __i915_active_acquire(&ce->active);
+
+ if (intel_context_is_barrier(ce))
+ return 0;
+
+ /* Preallocate tracking nodes */
+ err = i915_active_acquire_preallocate_barrier(&ce->active,
+ ce->engine);
+ if (err)
+ i915_active_release(&ce->active);
- err = 0;
- with_intel_runtime_pm(&ce->engine->i915->runtime_pm, wakeref)
- err = ce->ops->pin(ce);
+ return err;
+}
+
+static void intel_context_active_release(struct intel_context *ce)
+{
+ /* Nodes preallocated in intel_context_active() */
+ i915_active_acquire_barrier(&ce->active);
+ i915_active_release(&ce->active);
+}
+
+int __intel_context_do_pin(struct intel_context *ce)
+{
+ int err;
+
+ if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
+ err = intel_context_alloc_state(ce);
if (err)
- goto err;
+ return err;
+ }
- i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */
+ err = i915_active_acquire(&ce->active);
+ if (err)
+ return err;
+
+ if (mutex_lock_interruptible(&ce->pin_mutex)) {
+ err = -EINTR;
+ goto out_release;
+ }
+
+ if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
+ err = intel_context_active_acquire(ce);
+ if (unlikely(err))
+ goto out_unlock;
+
+ err = ce->ops->pin(ce);
+ if (unlikely(err))
+ goto err_active;
+
+ CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
smp_mb__before_atomic(); /* flush pin before it is visible */
+ atomic_inc(&ce->pin_count);
}
- atomic_inc(&ce->pin_count);
GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ goto out_unlock;
+err_active:
+ intel_context_active_release(ce);
+out_unlock:
mutex_unlock(&ce->pin_mutex);
- return 0;
-
-err:
- mutex_unlock(&ce->pin_mutex);
+out_release:
+ i915_active_release(&ce->active);
return err;
}
void intel_context_unpin(struct intel_context *ce)
{
- if (likely(atomic_add_unless(&ce->pin_count, -1, 1)))
+ if (!atomic_dec_and_test(&ce->pin_count))
return;
- /* We may be called from inside intel_context_pin() to evict another */
- intel_context_get(ce);
- mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING);
-
- if (likely(atomic_dec_and_test(&ce->pin_count))) {
- ce->ops->unpin(ce);
-
- i915_gem_context_put(ce->gem_context);
- intel_context_active_release(ce);
- }
+ CE_TRACE(ce, "unpin\n");
+ ce->ops->unpin(ce);
- mutex_unlock(&ce->pin_mutex);
+ /*
+ * Once released, we may asynchronously drop the active reference.
+ * As that may be the only reference keeping the context alive,
+ * take an extra now so that it is not freed before we finish
+ * dereferencing it.
+ */
+ intel_context_get(ce);
+ intel_context_active_release(ce);
intel_context_put(ce);
}
-static int __context_pin_state(struct i915_vma *vma, unsigned long flags)
+static int __context_pin_state(struct i915_vma *vma)
{
+ unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
int err;
- err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL);
+ err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH);
if (err)
return err;
+ err = i915_active_acquire(&vma->active);
+ if (err)
+ goto err_unpin;
+
/*
* And mark it as a globally pinned object to let the shrinker know
* it cannot reclaim the object until we release it.
*/
- vma->obj->pin_global++;
+ i915_vma_make_unshrinkable(vma);
vma->obj->mm.dirty = true;
return 0;
+
+err_unpin:
+ i915_vma_unpin(vma);
+ return err;
}
static void __context_unpin_state(struct i915_vma *vma)
{
- vma->obj->pin_global--;
+ i915_vma_make_shrinkable(vma);
+ i915_active_release(&vma->active);
__i915_vma_unpin(vma);
}
-static void intel_context_retire(struct i915_active *active)
+static int __ring_active(struct intel_ring *ring)
{
- struct intel_context *ce = container_of(active, typeof(*ce), active);
+ int err;
- if (ce->state)
- __context_unpin_state(ce->state);
+ err = i915_active_acquire(&ring->vma->active);
+ if (err)
+ return err;
- intel_ring_unpin(ce->ring);
- intel_context_put(ce);
+ err = intel_ring_pin(ring);
+ if (err)
+ goto err_active;
+
+ return 0;
+
+err_active:
+ i915_active_release(&ring->vma->active);
+ return err;
}
-void
-intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static void __ring_retire(struct intel_ring *ring)
{
- GEM_BUG_ON(!engine->cops);
+ intel_ring_unpin(ring);
+ i915_active_release(&ring->vma->active);
+}
- kref_init(&ce->ref);
+__i915_active_call
+static void __intel_context_retire(struct i915_active *active)
+{
+ struct intel_context *ce = container_of(active, typeof(*ce), active);
- ce->gem_context = ctx;
- ce->engine = engine;
- ce->ops = engine->cops;
- ce->sseu = engine->sseu;
+ CE_TRACE(ce, "retire\n");
- INIT_LIST_HEAD(&ce->signal_link);
- INIT_LIST_HEAD(&ce->signals);
+ set_bit(CONTEXT_VALID_BIT, &ce->flags);
+ if (ce->state)
+ __context_unpin_state(ce->state);
- mutex_init(&ce->pin_mutex);
+ intel_timeline_unpin(ce->timeline);
+ __ring_retire(ce->ring);
- i915_active_init(ctx->i915, &ce->active, intel_context_retire);
+ intel_context_put(ce);
}
-int intel_context_active_acquire(struct intel_context *ce, unsigned long flags)
+static int __intel_context_active(struct i915_active *active)
{
+ struct intel_context *ce = container_of(active, typeof(*ce), active);
int err;
- if (!i915_active_acquire(&ce->active))
- return 0;
+ CE_TRACE(ce, "active\n");
intel_context_get(ce);
- err = intel_ring_pin(ce->ring);
+ err = __ring_active(ce->ring);
if (err)
goto err_put;
+ err = intel_timeline_pin(ce->timeline);
+ if (err)
+ goto err_ring;
+
if (!ce->state)
return 0;
- err = __context_pin_state(ce->state, flags);
+ err = __context_pin_state(ce->state);
if (err)
- goto err_ring;
-
- /* Preallocate tracking nodes */
- if (!i915_gem_context_is_kernel(ce->gem_context)) {
- err = i915_active_acquire_preallocate_barrier(&ce->active,
- ce->engine);
- if (err)
- goto err_state;
- }
+ goto err_timeline;
return 0;
-err_state:
- __context_unpin_state(ce->state);
+err_timeline:
+ intel_timeline_unpin(ce->timeline);
err_ring:
- intel_ring_unpin(ce->ring);
+ __ring_retire(ce->ring);
err_put:
intel_context_put(ce);
- i915_active_cancel(&ce->active);
return err;
}
-void intel_context_active_release(struct intel_context *ce)
+void
+intel_context_init(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
- /* Nodes preallocated in intel_context_active() */
- i915_active_acquire_barrier(&ce->active);
- i915_active_release(&ce->active);
+ GEM_BUG_ON(!engine->cops);
+ GEM_BUG_ON(!engine->gt->vm);
+
+ kref_init(&ce->ref);
+
+ ce->engine = engine;
+ ce->ops = engine->cops;
+ ce->sseu = engine->sseu;
+ ce->ring = __intel_context_ring_size(SZ_4K);
+
+ ce->vm = i915_vm_get(engine->gt->vm);
+
+ INIT_LIST_HEAD(&ce->signal_link);
+ INIT_LIST_HEAD(&ce->signals);
+
+ mutex_init(&ce->pin_mutex);
+
+ i915_active_init(&ce->active,
+ __intel_context_active, __intel_context_retire);
+}
+
+void intel_context_fini(struct intel_context *ce)
+{
+ if (ce->timeline)
+ intel_timeline_put(ce->timeline);
+ i915_vm_put(ce->vm);
+
+ mutex_destroy(&ce->pin_mutex);
+ i915_active_fini(&ce->active);
}
static void i915_global_context_shrink(void)
@@ -227,13 +329,42 @@ int __init i915_global_context_init(void)
void intel_context_enter_engine(struct intel_context *ce)
{
intel_engine_pm_get(ce->engine);
+ intel_timeline_enter(ce->timeline);
}
void intel_context_exit_engine(struct intel_context *ce)
{
+ intel_timeline_exit(ce->timeline);
intel_engine_pm_put(ce->engine);
}
+int intel_context_prepare_remote_request(struct intel_context *ce,
+ struct i915_request *rq)
+{
+ struct intel_timeline *tl = ce->timeline;
+ int err;
+
+ /* Only suitable for use in remotely modifying this context */
+ GEM_BUG_ON(rq->context == ce);
+
+ if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
+ /* Queue this switch after current activity by this context. */
+ err = i915_active_fence_set(&tl->last_request, rq);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Guarantee context image and the timeline remains pinned until the
+ * modifying request is retired by setting the ce activity tracker.
+ *
+ * But we only need to take one pin on the account of it. Or in other
+ * words transfer the pinned ce object to tracked active request.
+ */
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ return i915_active_add_request(&ce->active, rq);
+}
+
struct i915_request *intel_context_create_request(struct intel_context *ce)
{
struct i915_request *rq;
@@ -248,3 +379,7 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
return rq;
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_context.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index a47275bc4f01..30bd248827d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -7,18 +7,31 @@
#ifndef __INTEL_CONTEXT_H__
#define __INTEL_CONTEXT_H__
+#include <linux/bitops.h>
#include <linux/lockdep.h>
+#include <linux/types.h>
+#include "i915_active.h"
#include "intel_context_types.h"
#include "intel_engine_types.h"
+#include "intel_ring_types.h"
+#include "intel_timeline_types.h"
+
+#define CE_TRACE(ce, fmt, ...) do { \
+ const struct intel_context *ce__ = (ce); \
+ ENGINE_TRACE(ce__->engine, "context:%llx " fmt, \
+ ce__->timeline->fence_context, \
+ ##__VA_ARGS__); \
+} while (0)
void intel_context_init(struct intel_context *ce,
- struct i915_gem_context *ctx,
struct intel_engine_cs *engine);
+void intel_context_fini(struct intel_context *ce);
struct intel_context *
-intel_context_create(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine);
+intel_context_create(struct intel_engine_cs *engine);
+
+int intel_context_alloc_state(struct intel_context *ce);
void intel_context_free(struct intel_context *ce);
@@ -65,9 +78,14 @@ static inline void intel_context_unlock_pinned(struct intel_context *ce)
int __intel_context_do_pin(struct intel_context *ce);
+static inline bool intel_context_pin_if_active(struct intel_context *ce)
+{
+ return atomic_inc_not_zero(&ce->pin_count);
+}
+
static inline int intel_context_pin(struct intel_context *ce)
{
- if (likely(atomic_inc_not_zero(&ce->pin_count)))
+ if (likely(intel_context_pin_if_active(ce)))
return 0;
return __intel_context_do_pin(ce);
@@ -86,25 +104,25 @@ void intel_context_exit_engine(struct intel_context *ce);
static inline void intel_context_enter(struct intel_context *ce)
{
+ lockdep_assert_held(&ce->timeline->mutex);
if (!ce->active_count++)
ce->ops->enter(ce);
}
static inline void intel_context_mark_active(struct intel_context *ce)
{
+ lockdep_assert_held(&ce->timeline->mutex);
++ce->active_count;
}
static inline void intel_context_exit(struct intel_context *ce)
{
+ lockdep_assert_held(&ce->timeline->mutex);
GEM_BUG_ON(!ce->active_count);
if (!--ce->active_count)
ce->ops->exit(ce);
}
-int intel_context_active_acquire(struct intel_context *ce, unsigned long flags);
-void intel_context_active_release(struct intel_context *ce);
-
static inline struct intel_context *intel_context_get(struct intel_context *ce)
{
kref_get(&ce->ref);
@@ -116,19 +134,94 @@ static inline void intel_context_put(struct intel_context *ce)
kref_put(&ce->ref, ce->ops->destroy);
}
-static inline int __must_check
+static inline struct intel_timeline *__must_check
intel_context_timeline_lock(struct intel_context *ce)
- __acquires(&ce->ring->timeline->mutex)
+ __acquires(&ce->timeline->mutex)
{
- return mutex_lock_interruptible(&ce->ring->timeline->mutex);
+ struct intel_timeline *tl = ce->timeline;
+ int err;
+
+ err = mutex_lock_interruptible(&tl->mutex);
+ if (err)
+ return ERR_PTR(err);
+
+ return tl;
}
-static inline void intel_context_timeline_unlock(struct intel_context *ce)
- __releases(&ce->ring->timeline->mutex)
+static inline void intel_context_timeline_unlock(struct intel_timeline *tl)
+ __releases(&tl->mutex)
{
- mutex_unlock(&ce->ring->timeline->mutex);
+ mutex_unlock(&tl->mutex);
}
+int intel_context_prepare_remote_request(struct intel_context *ce,
+ struct i915_request *rq);
+
struct i915_request *intel_context_create_request(struct intel_context *ce);
+static inline struct intel_ring *__intel_context_ring_size(u64 sz)
+{
+ return u64_to_ptr(struct intel_ring, sz);
+}
+
+static inline bool intel_context_is_barrier(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+}
+
+static inline bool intel_context_use_semaphores(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_set_use_semaphores(struct intel_context *ce)
+{
+ set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline void intel_context_clear_use_semaphores(struct intel_context *ce)
+{
+ clear_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+}
+
+static inline bool intel_context_is_banned(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool intel_context_set_banned(struct intel_context *ce)
+{
+ return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
+}
+
+static inline bool
+intel_context_force_single_submission(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline void
+intel_context_set_single_submission(struct intel_context *ce)
+{
+ __set_bit(CONTEXT_FORCE_SINGLE_SUBMISSION, &ce->flags);
+}
+
+static inline bool
+intel_context_nopreempt(const struct intel_context *ce)
+{
+ return test_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_set_nopreempt(struct intel_context *ce)
+{
+ set_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
+static inline void
+intel_context_clear_nopreempt(struct intel_context *ce)
+{
+ clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
+}
+
#endif /* __INTEL_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 08049ee91cee..ca1420fb8b53 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -13,15 +13,20 @@
#include <linux/types.h>
#include "i915_active_types.h"
+#include "i915_utils.h"
#include "intel_engine_types.h"
#include "intel_sseu.h"
+#define CONTEXT_REDZONE POISON_INUSE
+
struct i915_gem_context;
struct i915_vma;
struct intel_context;
struct intel_ring;
struct intel_context_ops {
+ int (*alloc)(struct intel_context *ce);
+
int (*pin)(struct intel_context *ce);
void (*unpin)(struct intel_context *ce);
@@ -35,20 +40,35 @@ struct intel_context_ops {
struct intel_context {
struct kref ref;
- struct i915_gem_context *gem_context;
struct intel_engine_cs *engine;
struct intel_engine_cs *inflight;
+#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
+#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
+
+ struct i915_address_space *vm;
+ struct i915_gem_context __rcu *gem_context;
struct list_head signal_link;
struct list_head signals;
struct i915_vma *state;
struct intel_ring *ring;
+ struct intel_timeline *timeline;
+
+ unsigned long flags;
+#define CONTEXT_BARRIER_BIT 0
+#define CONTEXT_ALLOC_BIT 1
+#define CONTEXT_VALID_BIT 2
+#define CONTEXT_USE_SEMAPHORES 3
+#define CONTEXT_BANNED 4
+#define CONTEXT_FORCE_SINGLE_SUBMISSION 5
+#define CONTEXT_NOPREEMPT 6
u32 *lrc_reg_state;
u64 lrc_desc;
+ u32 tag; /* cookie passed to HW to track this context on submission */
- unsigned int active_count; /* notionally protected by timeline->mutex */
+ unsigned int active_count; /* protected by timeline->mutex */
atomic_t pin_count;
struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 2f1c6871ee95..5df003061e44 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -9,17 +9,17 @@
#include <linux/random.h>
#include <linux/seqlock.h>
-#include "i915_gem_batch_pool.h"
#include "i915_pmu.h"
#include "i915_reg.h"
#include "i915_request.h"
#include "i915_selftest.h"
-#include "i915_timeline.h"
+#include "gt/intel_timeline.h"
#include "intel_engine_types.h"
#include "intel_gpu_commands.h"
#include "intel_workarounds.h"
struct drm_printer;
+struct intel_gt;
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
* but keeps the logic simple. Indeed, the whole purpose of this macro is just
@@ -29,6 +29,13 @@ struct drm_printer;
#define CACHELINE_BYTES 64
#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+#define ENGINE_TRACE(e, fmt, ...) do { \
+ const struct intel_engine_cs *e__ __maybe_unused = (e); \
+ GEM_TRACE("%s %s: " fmt, \
+ dev_name(e__->i915->drm.dev), e__->name, \
+ ##__VA_ARGS__); \
+} while (0)
+
/*
* The register defines to be used with the following macros need to accept a
* base param, e.g:
@@ -51,7 +58,7 @@ struct drm_printer;
#define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__)
#define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__)
#define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__)
-#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read, __VA_ARGS__)
+#define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read_fw, __VA_ARGS__)
#define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__)
#define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \
@@ -90,106 +97,36 @@ struct drm_printer;
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
-enum intel_engine_hangcheck_action {
- ENGINE_IDLE = 0,
- ENGINE_WAIT,
- ENGINE_ACTIVE_SEQNO,
- ENGINE_ACTIVE_HEAD,
- ENGINE_ACTIVE_SUBUNITS,
- ENGINE_WAIT_KICK,
- ENGINE_DEAD,
-};
-
-static inline const char *
-hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
-{
- switch (a) {
- case ENGINE_IDLE:
- return "idle";
- case ENGINE_WAIT:
- return "wait";
- case ENGINE_ACTIVE_SEQNO:
- return "active seqno";
- case ENGINE_ACTIVE_HEAD:
- return "active head";
- case ENGINE_ACTIVE_SUBUNITS:
- return "active subunits";
- case ENGINE_WAIT_KICK:
- return "wait kick";
- case ENGINE_DEAD:
- return "dead";
- }
- return "unknown";
-}
-
-void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
-
-static inline void
-execlists_set_active(struct intel_engine_execlists *execlists,
- unsigned int bit)
+static inline unsigned int
+execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
- __set_bit(bit, (unsigned long *)&execlists->active);
+ return execlists->port_mask + 1;
}
-static inline bool
-execlists_set_active_once(struct intel_engine_execlists *execlists,
- unsigned int bit)
+static inline struct i915_request *
+execlists_active(const struct intel_engine_execlists *execlists)
{
- return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
+ return *READ_ONCE(execlists->active);
}
static inline void
-execlists_clear_active(struct intel_engine_execlists *execlists,
- unsigned int bit)
+execlists_active_lock_bh(struct intel_engine_execlists *execlists)
{
- __clear_bit(bit, (unsigned long *)&execlists->active);
+ local_bh_disable(); /* prevent local softirq and lock recursion */
+ tasklet_lock(&execlists->tasklet);
}
static inline void
-execlists_clear_all_active(struct intel_engine_execlists *execlists)
+execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
{
- execlists->active = 0;
+ tasklet_unlock(&execlists->tasklet);
+ local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
}
-static inline bool
-execlists_is_active(const struct intel_engine_execlists *execlists,
- unsigned int bit)
-{
- return test_bit(bit, (unsigned long *)&execlists->active);
-}
-
-void execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port);
-void execlists_user_end(struct intel_engine_execlists *execlists);
-
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
-
struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
-static inline unsigned int
-execlists_num_ports(const struct intel_engine_execlists * const execlists)
-{
- return execlists->port_mask + 1;
-}
-
-static inline struct execlist_port *
-execlists_port_complete(struct intel_engine_execlists * const execlists,
- struct execlist_port * const port)
-{
- const unsigned int m = execlists->port_mask;
-
- GEM_BUG_ON(port_index(port, execlists) != 0);
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));
-
- memmove(port, port + 1, m * sizeof(struct execlist_port));
- memset(port + m, 0, sizeof(struct execlist_port));
-
- return port;
-}
-
static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
@@ -243,134 +180,19 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
#define I915_HWS_CSB_WRITE_INDEX 0x1f
#define CNL_HWS_CSB_WRITE_INDEX 0x2f
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine,
- struct i915_timeline *timeline,
- int size);
-int intel_ring_pin(struct intel_ring *ring);
-void intel_ring_reset(struct intel_ring *ring, u32 tail);
-unsigned int intel_ring_update_space(struct intel_ring *ring);
-void intel_ring_unpin(struct intel_ring *ring);
-void intel_ring_free(struct kref *ref);
-
-static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
-{
- kref_get(&ring->ref);
- return ring;
-}
-
-static inline void intel_ring_put(struct intel_ring *ring)
-{
- kref_put(&ring->ref, intel_ring_free);
-}
-
void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine);
-int __must_check intel_ring_cacheline_align(struct i915_request *rq);
-
-u32 __must_check *intel_ring_begin(struct i915_request *rq, unsigned int n);
-
-static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
-{
- /* Dummy function.
- *
- * This serves as a placeholder in the code so that the reader
- * can compare against the preceding intel_ring_begin() and
- * check that the number of dwords emitted matches the space
- * reserved for the command packet (i.e. the value passed to
- * intel_ring_begin()).
- */
- GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
-}
-
-static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
-{
- return pos & (ring->size - 1);
-}
-
-static inline bool
-intel_ring_offset_valid(const struct intel_ring *ring,
- unsigned int pos)
-{
- if (pos & -ring->size) /* must be strictly within the ring */
- return false;
-
- if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
- return false;
-
- return true;
-}
+int intel_engines_init_mmio(struct intel_gt *gt);
+int intel_engines_init(struct intel_gt *gt);
-static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
-{
- /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
- u32 offset = addr - rq->ring->vaddr;
- GEM_BUG_ON(offset > rq->ring->size);
- return intel_ring_wrap(rq->ring, offset);
-}
-
-static inline void
-assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
-{
- GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
- /*
- * "Ring Buffer Use"
- * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
- * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
- * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- *
- * We use ring->head as the last known location of the actual RING_HEAD,
- * it may have advanced but in the worst case it is equally the same
- * as ring->head and so we should never program RING_TAIL to advance
- * into the same cacheline as ring->head.
- */
-#define cacheline(a) round_down(a, CACHELINE_BYTES)
- GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
- tail < ring->head);
-#undef cacheline
-}
-
-static inline unsigned int
-intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
-{
- /* Whilst writes to the tail are strictly order, there is no
- * serialisation between readers and the writers. The tail may be
- * read by i915_request_retire() just as it is being updated
- * by execlists, as although the breadcrumb is complete, the context
- * switch hasn't been seen.
- */
- assert_ring_tail_valid(ring, tail);
- ring->tail = tail;
- return tail;
-}
-
-static inline unsigned int
-__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
-{
- /*
- * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
- * same cacheline, the Head Pointer must not be greater than the Tail
- * Pointer."
- */
- GEM_BUG_ON(!is_power_of_2(size));
- return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
-int intel_engines_init_mmio(struct drm_i915_private *i915);
-int intel_engines_setup(struct drm_i915_private *i915);
-int intel_engines_init(struct drm_i915_private *i915);
-void intel_engines_cleanup(struct drm_i915_private *i915);
+void intel_engines_release(struct intel_gt *gt);
+void intel_engines_free(struct intel_gt *gt);
int intel_engine_init_common(struct intel_engine_cs *engine);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
int intel_ring_submission_setup(struct intel_engine_cs *engine);
-int intel_ring_submission_init(struct intel_engine_cs *engine);
int intel_engine_stop_cs(struct intel_engine_cs *engine);
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
@@ -380,7 +202,7 @@ void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine);
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
struct intel_instdone *instdone);
void intel_engine_init_execlists(struct intel_engine_cs *engine);
@@ -388,20 +210,14 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine);
void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
-void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
-
-void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
static inline void
-intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
+intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
{
irq_work_queue(&engine->breadcrumbs.irq_work);
}
-void intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
-
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
@@ -456,19 +272,19 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
return cs;
}
-static inline void intel_engine_reset(struct intel_engine_cs *engine,
- bool stalled)
+static inline void __intel_engine_reset(struct intel_engine_cs *engine,
+ bool stalled)
{
- if (engine->reset.reset)
- engine->reset.reset(engine, stalled);
+ if (engine->reset.rewind)
+ engine->reset.rewind(engine, stalled);
engine->serial++; /* contexts lost */
}
+bool intel_engines_are_idle(struct intel_gt *gt);
bool intel_engine_is_idle(struct intel_engine_cs *engine);
-bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
+void intel_engine_flush_submission(struct intel_engine_cs *engine);
-void intel_engines_reset_default_submission(struct drm_i915_private *i915);
-unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
+void intel_engines_reset_default_submission(struct intel_gt *gt);
bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
@@ -477,64 +293,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...);
-struct intel_engine_cs *
-intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
-
-static inline void intel_engine_context_in(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
- GEM_BUG_ON(engine->stats.active == 0);
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
-static inline void intel_engine_context_out(struct intel_engine_cs *engine)
-{
- unsigned long flags;
-
- if (READ_ONCE(engine->stats.enabled) == 0)
- return;
-
- write_seqlock_irqsave(&engine->stats.lock, flags);
-
- if (engine->stats.enabled > 0) {
- ktime_t last;
-
- if (engine->stats.active && --engine->stats.active == 0) {
- /*
- * Decrement the active context count and in case GPU
- * is now idle add up to the running total.
- */
- last = ktime_sub(ktime_get(), engine->stats.start);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- } else if (engine->stats.active == 0) {
- /*
- * After turning on engine stats, context out might be
- * the first event in which case we account from the
- * time stats gathering was turned on.
- */
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
-
- engine->stats.total = ktime_add(engine->stats.total,
- last);
- }
- }
-
- write_sequnlock_irqrestore(&engine->stats.lock, flags);
-}
-
int intel_enable_engine_stats(struct intel_engine_cs *engine);
void intel_disable_engine_stats(struct intel_engine_cs *engine);
@@ -543,7 +301,7 @@ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
struct i915_request *
intel_engine_find_active_request(struct intel_engine_cs *engine);
-u32 intel_engine_context_size(struct drm_i915_private *i915, u8 class);
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
@@ -571,4 +329,22 @@ void intel_engine_init_active(struct intel_engine_cs *engine,
#define ENGINE_MOCK 1
#define ENGINE_VIRTUAL 2
+static inline bool
+intel_engine_has_preempt_reset(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return false;
+
+ return intel_engine_has_preemption(engine);
+}
+
+static inline bool
+intel_engine_has_timeslices(const struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return false;
+
+ return intel_engine_has_semaphores(engine);
+}
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f25632c9b292..06ff7695fa29 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -28,11 +28,16 @@
#include "i915_drv.h"
+#include "intel_context.h"
#include "intel_engine.h"
#include "intel_engine_pm.h"
-#include "intel_context.h"
+#include "intel_engine_pool.h"
+#include "intel_engine_user.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
#include "intel_lrc.h"
#include "intel_reset.h"
+#include "intel_ring.h"
/* Haswell does have the CXT_SIZE register however it does not appear to be
* valid. Now, docs explain in dwords what is in the context object. The full
@@ -51,30 +56,6 @@
#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
-struct engine_class_info {
- const char *name;
- u8 uabi_class;
-};
-
-static const struct engine_class_info intel_engine_classes[] = {
- [RENDER_CLASS] = {
- .name = "rcs",
- .uabi_class = I915_ENGINE_CLASS_RENDER,
- },
- [COPY_ENGINE_CLASS] = {
- .name = "bcs",
- .uabi_class = I915_ENGINE_CLASS_COPY,
- },
- [VIDEO_DECODE_CLASS] = {
- .name = "vcs",
- .uabi_class = I915_ENGINE_CLASS_VIDEO,
- },
- [VIDEO_ENHANCEMENT_CLASS] = {
- .name = "vecs",
- .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE,
- },
-};
-
#define MAX_MMIO_BASES 3
struct engine_info {
unsigned int hw_id;
@@ -160,7 +141,7 @@ static const struct engine_info intel_engines[] = {
/**
* intel_engine_context_size() - return the size of the context for an engine
- * @dev_priv: i915 device private
+ * @gt: the gt
* @class: engine class
*
* Each engine class may require a different amount of space for a context
@@ -172,18 +153,20 @@ static const struct engine_info intel_engines[] = {
* in LRC mode, but does not include the "shared data page" used with
* GuC submission. The caller should account for this if using the GuC.
*/
-u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
+u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
{
+ struct intel_uncore *uncore = gt->uncore;
u32 cxt_size;
BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
switch (class) {
case RENDER_CLASS:
- switch (INTEL_GEN(dev_priv)) {
+ switch (INTEL_GEN(gt->i915)) {
default:
- MISSING_CASE(INTEL_GEN(dev_priv));
+ MISSING_CASE(INTEL_GEN(gt->i915));
return DEFAULT_LR_CONTEXT_RENDER_SIZE;
+ case 12:
case 11:
return GEN11_LR_CONTEXT_RENDER_SIZE;
case 10:
@@ -193,14 +176,14 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
case 8:
return GEN8_LR_CONTEXT_RENDER_SIZE;
case 7:
- if (IS_HASWELL(dev_priv))
+ if (IS_HASWELL(gt->i915))
return HSW_CXT_TOTAL_SIZE;
- cxt_size = I915_READ(GEN7_CXT_SIZE);
+ cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 6:
- cxt_size = I915_READ(CXT_SIZE);
+ cxt_size = intel_uncore_read(uncore, CXT_SIZE);
return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64,
PAGE_SIZE);
case 5:
@@ -215,9 +198,9 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
* minimum allocation anyway so it should all come
* out in the wash.
*/
- cxt_size = I915_READ(CXT_SIZE) + 1;
+ cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1;
DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n",
- INTEL_GEN(dev_priv),
+ INTEL_GEN(gt->i915),
cxt_size * 64,
cxt_size - 1);
return round_up(cxt_size * 64, PAGE_SIZE);
@@ -234,7 +217,7 @@ u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class)
case VIDEO_DECODE_CLASS:
case VIDEO_ENHANCEMENT_CLASS:
case COPY_ENGINE_CLASS:
- if (INTEL_GEN(dev_priv) < 8)
+ if (INTEL_GEN(gt->i915) < 8)
return 0;
return GEN8_LR_CONTEXT_OTHER_SIZE;
}
@@ -255,11 +238,16 @@ static u32 __engine_mmio_base(struct drm_i915_private *i915,
return bases[i].base;
}
-static void __sprint_engine_name(char *name, const struct engine_info *info)
+static void __sprint_engine_name(struct intel_engine_cs *engine)
{
- WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u",
- intel_engine_classes[info->class].name,
- info->instance) >= INTEL_ENGINE_CS_MAX_NAME);
+ /*
+ * Before we know what the uABI name for this engine will be,
+ * we still would like to keep track of this engine in the debug logs.
+ * We throw in a ' here as a reminder that this isn't its final name.
+ */
+ GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
+ intel_engine_class_repr(engine->class),
+ engine->instance) >= sizeof(engine->name));
}
void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
@@ -283,28 +271,26 @@ static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
intel_engine_set_hwsp_writemask(engine, ~0u);
}
-static int
-intel_engine_setup(struct drm_i915_private *dev_priv,
- enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
{
const struct engine_info *info = &intel_engines[id];
struct intel_engine_cs *engine;
- GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes));
-
BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
+ if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
+ return -EINVAL;
+
if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
return -EINVAL;
if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
return -EINVAL;
- if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance]))
+ if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
return -EINVAL;
- GEM_BUG_ON(dev_priv->engine[id]);
engine = kzalloc(sizeof(*engine), GFP_KERNEL);
if (!engine)
return -ENOMEM;
@@ -312,33 +298,37 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
engine->id = id;
+ engine->legacy_idx = INVALID_ENGINE;
engine->mask = BIT(id);
- engine->i915 = dev_priv;
- engine->uncore = &dev_priv->uncore;
- __sprint_engine_name(engine->name, info);
+ engine->i915 = gt->i915;
+ engine->gt = gt;
+ engine->uncore = gt->uncore;
engine->hw_id = engine->guc_id = info->hw_id;
- engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases);
+ engine->mmio_base = __engine_mmio_base(gt->i915, info->mmio_bases);
+
engine->class = info->class;
engine->instance = info->instance;
-
- /*
- * To be overridden by the backend on setup. However to facilitate
- * cleanup on error during setup, we always provide the destroy vfunc.
- */
- engine->destroy = (typeof(engine->destroy))kfree;
-
- engine->uabi_class = intel_engine_classes[info->class].uabi_class;
-
- engine->context_size = intel_engine_context_size(dev_priv,
- engine->class);
+ __sprint_engine_name(engine);
+
+ engine->props.heartbeat_interval_ms =
+ CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
+ engine->props.preempt_timeout_ms =
+ CONFIG_DRM_I915_PREEMPT_TIMEOUT;
+ engine->props.stop_timeout_ms =
+ CONFIG_DRM_I915_STOP_TIMEOUT;
+ engine->props.timeslice_duration_ms =
+ CONFIG_DRM_I915_TIMESLICE_DURATION;
+
+ engine->context_size = intel_engine_context_size(gt, engine->class);
if (WARN_ON(engine->context_size > BIT(20)))
engine->context_size = 0;
if (engine->context_size)
- DRIVER_CAPS(dev_priv)->has_logical_contexts = true;
+ DRIVER_CAPS(gt->i915)->has_logical_contexts = true;
/* Nothing to do here, execute in order of dependencies */
engine->schedule = NULL;
+ ewma__engine_latency_init(&engine->latency);
seqlock_init(&engine->stats.lock);
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
@@ -346,8 +336,11 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
/* Scrub mmio state on takeover */
intel_engine_sanitize_mmio(engine);
- dev_priv->engine_class[info->class][info->instance] = engine;
- dev_priv->engine[id] = engine;
+ gt->engine_class[info->class][info->instance] = engine;
+ gt->engine[id] = engine;
+
+ gt->i915->engine[id] = engine;
+
return 0;
}
@@ -381,38 +374,58 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine)
}
}
-static void intel_setup_engine_capabilities(struct drm_i915_private *i915)
+static void intel_setup_engine_capabilities(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
__setup_engine_capabilities(engine);
}
/**
- * intel_engines_cleanup() - free the resources allocated for Command Streamers
- * @i915: the i915 devic
+ * intel_engines_release() - free the resources allocated for Command Streamers
+ * @gt: pointer to struct intel_gt
*/
-void intel_engines_cleanup(struct drm_i915_private *i915)
+void intel_engines_release(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* Decouple the backend; but keep the layout for late GPU resets */
+ for_each_engine(engine, gt, id) {
+ if (!engine->release)
+ continue;
+
+ engine->release(engine);
+ engine->release = NULL;
+
+ memset(&engine->reset, 0, sizeof(engine->reset));
+
+ gt->i915->engine[id] = NULL;
+ }
+}
+
+void intel_engines_free(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
- engine->destroy(engine);
- i915->engine[id] = NULL;
+ for_each_engine(engine, gt, id) {
+ kfree(engine);
+ gt->engine[id] = NULL;
}
}
/**
* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
- * @i915: the i915 device
+ * @gt: pointer to struct intel_gt
*
* Return: non-zero if the initialization failed.
*/
-int intel_engines_init_mmio(struct drm_i915_private *i915)
+int intel_engines_init_mmio(struct intel_gt *gt)
{
+ struct drm_i915_private *i915 = gt->i915;
struct intel_device_info *device_info = mkwrite_device_info(i915);
const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask;
unsigned int mask = 0;
@@ -423,14 +436,14 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
WARN_ON(engine_mask &
GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES));
- if (i915_inject_load_failure())
+ if (i915_inject_probe_failure(i915))
return -ENODEV;
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
if (!HAS_ENGINE(i915, i))
continue;
- err = intel_engine_setup(i915, i);
+ err = intel_engine_setup(gt, i);
if (err)
goto cleanup;
@@ -445,61 +458,19 @@ int intel_engines_init_mmio(struct drm_i915_private *i915)
if (WARN_ON(mask != engine_mask))
device_info->engine_mask = mask;
- /* We always presume we have at least RCS available for later probing */
- if (WARN_ON(!HAS_ENGINE(i915, RCS0))) {
- err = -ENODEV;
- goto cleanup;
- }
-
RUNTIME_INFO(i915)->num_engines = hweight32(mask);
- i915_check_and_clear_faults(i915);
-
- intel_setup_engine_capabilities(i915);
-
- return 0;
-
-cleanup:
- intel_engines_cleanup(i915);
- return err;
-}
-
-/**
- * intel_engines_init() - init the Engine Command Streamers
- * @i915: i915 device private
- *
- * Return: non-zero if the initialization failed.
- */
-int intel_engines_init(struct drm_i915_private *i915)
-{
- int (*init)(struct intel_engine_cs *engine);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- if (HAS_EXECLISTS(i915))
- init = intel_execlists_submission_init;
- else
- init = intel_ring_submission_init;
+ intel_gt_check_and_clear_faults(gt);
- for_each_engine(engine, i915, id) {
- err = init(engine);
- if (err)
- goto cleanup;
- }
+ intel_setup_engine_capabilities(gt);
return 0;
cleanup:
- intel_engines_cleanup(i915);
+ intel_engines_free(gt);
return err;
}
-static void intel_engine_init_batch_pool(struct intel_engine_cs *engine)
-{
- i915_gem_batch_pool_init(&engine->batch_pool, engine);
-}
-
void intel_engine_init_execlists(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -508,6 +479,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
+ memset(execlists->pending, 0, sizeof(execlists->pending));
+ execlists->active =
+ memset(execlists->inflight, 0, sizeof(execlists->inflight));
+
execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
}
@@ -536,7 +511,7 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine,
unsigned int flags;
flags = PIN_GLOBAL;
- if (!HAS_LLC(engine->i915))
+ if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
/*
* On g33, we cannot place HWS above 256MiB, so
* restrict its pinning to the low mappable arena.
@@ -577,7 +552,7 @@ static int init_status_page(struct intel_engine_cs *engine)
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
- vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err;
@@ -607,7 +582,7 @@ err:
return ret;
}
-static int intel_engine_setup_common(struct intel_engine_cs *engine)
+static int engine_setup_common(struct intel_engine_cs *engine)
{
int err;
@@ -620,104 +595,26 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_active(engine, ENGINE_PHYSICAL);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
- intel_engine_init_hangcheck(engine);
- intel_engine_init_batch_pool(engine);
intel_engine_init_cmd_parser(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
+
+ intel_engine_pool_init(&engine->pool);
/* Use the whole device by default */
engine->sseu =
intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu);
- return 0;
-}
-
-/**
- * intel_engines_setup- setup engine state not requiring hw access
- * @i915: Device to setup.
- *
- * Initializes engine structure members shared between legacy and execlists
- * submission modes which do not require hardware access.
- *
- * Typically done early in the submission mode specific engine setup stage.
- */
-int intel_engines_setup(struct drm_i915_private *i915)
-{
- int (*setup)(struct intel_engine_cs *engine);
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- int err;
-
- if (HAS_EXECLISTS(i915))
- setup = intel_execlists_submission_setup;
- else
- setup = intel_ring_submission_setup;
-
- for_each_engine(engine, i915, id) {
- err = intel_engine_setup_common(engine);
- if (err)
- goto cleanup;
-
- err = setup(engine);
- if (err)
- goto cleanup;
-
- /* We expect the backend to take control over its state */
- GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree);
-
- GEM_BUG_ON(!engine->cops);
- }
+ intel_engine_init_workarounds(engine);
+ intel_engine_init_whitelist(engine);
+ intel_engine_init_ctx_wa(engine);
return 0;
-
-cleanup:
- intel_engines_cleanup(i915);
- return err;
-}
-
-void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
-{
- static const struct {
- u8 engine;
- u8 sched;
- } map[] = {
-#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
- MAP(PREEMPTION, PREEMPTION),
- MAP(SEMAPHORES, SEMAPHORES),
-#undef MAP
- };
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- u32 enabled, disabled;
-
- enabled = 0;
- disabled = 0;
- for_each_engine(engine, i915, id) { /* all engines must agree! */
- int i;
-
- if (engine->schedule)
- enabled |= (I915_SCHEDULER_CAP_ENABLED |
- I915_SCHEDULER_CAP_PRIORITY);
- else
- disabled |= (I915_SCHEDULER_CAP_ENABLED |
- I915_SCHEDULER_CAP_PRIORITY);
-
- for (i = 0; i < ARRAY_SIZE(map); i++) {
- if (engine->flags & BIT(map[i].engine))
- enabled |= BIT(map[i].sched);
- else
- disabled |= BIT(map[i].sched);
- }
- }
-
- i915->caps.scheduler = enabled & ~disabled;
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
- i915->caps.scheduler = 0;
}
struct measure_breadcrumb {
struct i915_request rq;
- struct i915_timeline timeline;
+ struct intel_timeline timeline;
struct intel_ring ring;
u32 cs[1024];
};
@@ -727,19 +624,19 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
struct measure_breadcrumb *frame;
int dw = -ENOMEM;
- GEM_BUG_ON(!engine->i915->gt.scratch);
+ GEM_BUG_ON(!engine->gt->scratch);
frame = kzalloc(sizeof(*frame), GFP_KERNEL);
if (!frame)
return -ENOMEM;
- if (i915_timeline_init(engine->i915,
- &frame->timeline,
- engine->status_page.vma))
+ if (intel_timeline_init(&frame->timeline,
+ engine->gt,
+ engine->status_page.vma))
goto out_frame;
- INIT_LIST_HEAD(&frame->ring.request_list);
- frame->ring.timeline = &frame->timeline;
+ mutex_lock(&frame->timeline.mutex);
+
frame->ring.vaddr = frame->cs;
frame->ring.size = sizeof(frame->cs);
frame->ring.effective_size = frame->ring.size;
@@ -748,48 +645,33 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine)
frame->rq.i915 = engine->i915;
frame->rq.engine = engine;
frame->rq.ring = &frame->ring;
- frame->rq.timeline = &frame->timeline;
+ rcu_assign_pointer(frame->rq.timeline, &frame->timeline);
- dw = i915_timeline_pin(&frame->timeline);
+ dw = intel_timeline_pin(&frame->timeline);
if (dw < 0)
goto out_timeline;
+ spin_lock_irq(&engine->active.lock);
dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
+ spin_unlock_irq(&engine->active.lock);
+
GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */
- i915_timeline_unpin(&frame->timeline);
+ intel_timeline_unpin(&frame->timeline);
out_timeline:
- i915_timeline_fini(&frame->timeline);
+ mutex_unlock(&frame->timeline.mutex);
+ intel_timeline_fini(&frame->timeline);
out_frame:
kfree(frame);
return dw;
}
-static int pin_context(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine,
- struct intel_context **out)
-{
- struct intel_context *ce;
- int err;
-
- ce = i915_gem_context_get_engine(ctx, engine->id);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
-
- err = intel_context_pin(ce);
- intel_context_put(ce);
- if (err)
- return err;
-
- *out = ce;
- return 0;
-}
-
void
intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
{
INIT_LIST_HEAD(&engine->active.requests);
+ INIT_LIST_HEAD(&engine->active.hold);
spin_lock_init(&engine->active.lock);
lockdep_set_subclass(&engine->active.lock, subclass);
@@ -807,6 +689,36 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
#endif
}
+static struct intel_context *
+create_kernel_context(struct intel_engine_cs *engine)
+{
+ static struct lock_class_key kernel;
+ struct intel_context *ce;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ce;
+
+ __set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
+
+ err = intel_context_pin(ce); /* perma-pin so it is always available */
+ if (err) {
+ intel_context_put(ce);
+ return ERR_PTR(err);
+ }
+
+ /*
+ * Give our perma-pinned kernel timelines a separate lockdep class,
+ * so that we can use them from within the normal user timelines
+ * should we need to inject GPU operations during their request
+ * construction.
+ */
+ lockdep_set_class(&ce->timeline->mutex, &kernel);
+
+ return ce;
+}
+
/**
* intel_engines_init_common - initialize cengine state which might require hw access
* @engine: Engine to initialize.
@@ -818,47 +730,65 @@ intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
*
* Returns zero on success or an error code on failure.
*/
-int intel_engine_init_common(struct intel_engine_cs *engine)
+static int engine_init_common(struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_context *ce;
int ret;
- /* We may need to do things with the shrinker which
+ engine->set_default_submission(engine);
+
+ ret = measure_breadcrumb_dw(engine);
+ if (ret < 0)
+ return ret;
+
+ engine->emit_fini_breadcrumb_dw = ret;
+
+ /*
+ * We may need to do things with the shrinker which
* require us to immediately switch back to the default
* context. This can cause a problem as pinning the
* default context also requires GTT space which may not
* be available. To avoid this we always pin the default
* context.
*/
- ret = pin_context(i915->kernel_context, engine,
- &engine->kernel_context);
- if (ret)
- return ret;
+ ce = create_kernel_context(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
- /*
- * Similarly the preempt context must always be available so that
- * we can interrupt the engine at any time. However, as preemption
- * is optional, we allow it to fail.
- */
- if (i915->preempt_context)
- pin_context(i915->preempt_context, engine,
- &engine->preempt_context);
+ engine->kernel_context = ce;
- ret = measure_breadcrumb_dw(engine);
- if (ret < 0)
- goto err_unpin;
+ return 0;
+}
- engine->emit_fini_breadcrumb_dw = ret;
+int intel_engines_init(struct intel_gt *gt)
+{
+ int (*setup)(struct intel_engine_cs *engine);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err;
- engine->set_default_submission(engine);
+ if (HAS_EXECLISTS(gt->i915))
+ setup = intel_execlists_submission_setup;
+ else
+ setup = intel_ring_submission_setup;
- return 0;
+ for_each_engine(engine, gt, id) {
+ err = engine_setup_common(engine);
+ if (err)
+ return err;
-err_unpin:
- if (engine->preempt_context)
- intel_context_unpin(engine->preempt_context);
- intel_context_unpin(engine->kernel_context);
- return ret;
+ err = setup(engine);
+ if (err)
+ return err;
+
+ err = engine_init_common(engine);
+ if (err)
+ return err;
+
+ intel_engine_add_user(engine);
+ }
+
+ return 0;
}
/**
@@ -871,19 +801,22 @@ err_unpin:
void intel_engine_cleanup_common(struct intel_engine_cs *engine)
{
GEM_BUG_ON(!list_empty(&engine->active.requests));
+ tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
+ intel_engine_fini_retire(engine);
+ intel_engine_pool_fini(&engine->pool);
intel_engine_fini_breadcrumbs(engine);
intel_engine_cleanup_cmd_parser(engine);
- i915_gem_batch_pool_fini(&engine->batch_pool);
if (engine->default_state)
i915_gem_object_put(engine->default_state);
- if (engine->preempt_context)
- intel_context_unpin(engine->preempt_context);
- intel_context_unpin(engine->kernel_context);
+ if (engine->kernel_context) {
+ intel_context_unpin(engine->kernel_context);
+ intel_context_put(engine->kernel_context);
+ }
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
intel_wa_list_free(&engine->ctx_wa_list);
@@ -919,6 +852,21 @@ u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
return bbaddr;
}
+static unsigned long stop_timeout(const struct intel_engine_cs *engine)
+{
+ if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */
+ return 0;
+
+ /*
+ * If we are doing a normal GPU reset, we can take our time and allow
+ * the engine to quiesce. We've stopped submission to the engine, and
+ * if we wait long enough an innocent context should complete and
+ * leave the engine idle. So they should not be caught unaware by
+ * the forthcoming GPU reset (which usually follows the stop_cs)!
+ */
+ return READ_ONCE(engine->props.stop_timeout_ms);
+}
+
int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
struct intel_uncore *uncore = engine->uncore;
@@ -929,16 +877,16 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
if (INTEL_GEN(engine->i915) < 3)
return -ENODEV;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
err = 0;
if (__intel_wait_for_register_fw(uncore,
mode, MODE_IDLE, MODE_IDLE,
- 1000, 0,
+ 1000, stop_timeout(engine),
NULL)) {
- GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name);
+ ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
err = -ETIMEDOUT;
}
@@ -950,7 +898,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine)
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
{
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
}
@@ -966,57 +914,23 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
}
}
-u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
-{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
- unsigned int slice = fls(sseu->slice_mask) - 1;
- unsigned int subslice;
- u32 mcr_s_ss_select;
-
- GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
- subslice = fls(sseu->subslice_mask[slice]);
- GEM_BUG_ON(!subslice);
- subslice--;
-
- if (IS_GEN(dev_priv, 10))
- mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
- GEN8_MCR_SUBSLICE(subslice);
- else if (INTEL_GEN(dev_priv) >= 11)
- mcr_s_ss_select = GEN11_MCR_SLICE(slice) |
- GEN11_MCR_SUBSLICE(subslice);
- else
- mcr_s_ss_select = 0;
-
- return mcr_s_ss_select;
-}
-
static u32
-read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
- i915_reg_t reg)
+read_subslice_reg(const struct intel_engine_cs *engine,
+ int slice, int subslice, i915_reg_t reg)
{
struct drm_i915_private *i915 = engine->i915;
struct intel_uncore *uncore = engine->uncore;
- u32 mcr_slice_subslice_mask;
- u32 mcr_slice_subslice_select;
- u32 default_mcr_s_ss_select;
- u32 mcr;
- u32 ret;
+ u32 mcr_mask, mcr_ss, mcr, old_mcr, val;
enum forcewake_domains fw_domains;
if (INTEL_GEN(i915) >= 11) {
- mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
- GEN11_MCR_SUBSLICE_MASK;
- mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) |
- GEN11_MCR_SUBSLICE(subslice);
+ mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
+ mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
} else {
- mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
- GEN8_MCR_SUBSLICE_MASK;
- mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) |
- GEN8_MCR_SUBSLICE(subslice);
+ mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+ mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
}
- default_mcr_s_ss_select = intel_calculate_mcr_s_ss_select(i915);
-
fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
FW_REG_READ);
fw_domains |= intel_uncore_forcewake_for_reg(uncore,
@@ -1026,33 +940,31 @@ read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice,
spin_lock_irq(&uncore->lock);
intel_uncore_forcewake_get__locked(uncore, fw_domains);
- mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+ old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
- WARN_ON_ONCE((mcr & mcr_slice_subslice_mask) !=
- default_mcr_s_ss_select);
-
- mcr &= ~mcr_slice_subslice_mask;
- mcr |= mcr_slice_subslice_select;
+ mcr &= ~mcr_mask;
+ mcr |= mcr_ss;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
- ret = intel_uncore_read_fw(uncore, reg);
+ val = intel_uncore_read_fw(uncore, reg);
- mcr &= ~mcr_slice_subslice_mask;
- mcr |= default_mcr_s_ss_select;
+ mcr &= ~mcr_mask;
+ mcr |= old_mcr & mcr_mask;
intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
intel_uncore_forcewake_put__locked(uncore, fw_domains);
spin_unlock_irq(&uncore->lock);
- return ret;
+ return val;
}
/* NB: please notice the memset */
-void intel_engine_get_instdone(struct intel_engine_cs *engine,
+void intel_engine_get_instdone(const struct intel_engine_cs *engine,
struct intel_instdone *instdone)
{
struct drm_i915_private *i915 = engine->i915;
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -1070,7 +982,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
- for_each_instdone_slice_subslice(i915, slice, subslice) {
+ for_each_instdone_slice_subslice(i915, sseu, slice, subslice) {
instdone->sampler[slice][subslice] =
read_subslice_reg(engine, slice, subslice,
GEN7_SAMPLER_INSTDONE);
@@ -1113,16 +1025,12 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
static bool ring_is_idle(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
- intel_wakeref_t wakeref;
bool idle = true;
if (I915_SELFTEST_ONLY(!engine->mmio_base))
return true;
- /* If the whole device is asleep, the engine must be idle */
- wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
- if (!wakeref)
+ if (!intel_engine_pm_get_if_awake(engine))
return true;
/* First check that no commands are left in the ring */
@@ -1131,15 +1039,34 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
idle = false;
/* No bit for gen2, so assume the CS parser is idle */
- if (INTEL_GEN(dev_priv) > 2 &&
+ if (INTEL_GEN(engine->i915) > 2 &&
!(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
idle = false;
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
+ intel_engine_pm_put(engine);
return idle;
}
+void intel_engine_flush_submission(struct intel_engine_cs *engine)
+{
+ struct tasklet_struct *t = &engine->execlists.tasklet;
+
+ if (__tasklet_is_scheduled(t)) {
+ local_bh_disable();
+ if (tasklet_trylock(t)) {
+ /* Must wait for any GPU reset in progress. */
+ if (__tasklet_is_enabled(t))
+ t->func(t->data);
+ tasklet_unlock(t);
+ }
+ local_bh_enable();
+ }
+
+ /* Otherwise flush the tasklet if it was running on another cpu */
+ tasklet_unlock_wait(t);
+}
+
/**
* intel_engine_is_idle() - Report if the engine has finished process all work
* @engine: the intel_engine_cs
@@ -1150,31 +1077,19 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
bool intel_engine_is_idle(struct intel_engine_cs *engine)
{
/* More white lies, if wedged, hw state is inconsistent */
- if (i915_reset_failed(engine->i915))
+ if (intel_gt_is_wedged(engine->gt))
return true;
- if (!intel_wakeref_active(&engine->wakeref))
+ if (!intel_engine_pm_is_awake(engine))
return true;
/* Waiting to drain ELSP? */
- if (READ_ONCE(engine->execlists.active)) {
- struct tasklet_struct *t = &engine->execlists.tasklet;
+ if (execlists_active(&engine->execlists)) {
+ synchronize_hardirq(engine->i915->drm.pdev->irq);
- synchronize_hardirq(engine->i915->drm.irq);
+ intel_engine_flush_submission(engine);
- local_bh_disable();
- if (tasklet_trylock(t)) {
- /* Must wait for any GPU reset in progress. */
- if (__tasklet_is_enabled(t))
- t->func(t->data);
- tasklet_unlock(t);
- }
- local_bh_enable();
-
- /* Otherwise flush the tasklet if it was on another cpu */
- tasklet_unlock_wait(t);
-
- if (READ_ONCE(engine->execlists.active))
+ if (execlists_active(&engine->execlists))
return false;
}
@@ -1186,7 +1101,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return ring_is_idle(engine);
}
-bool intel_engines_are_idle(struct drm_i915_private *i915)
+bool intel_engines_are_idle(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -1195,14 +1110,14 @@ bool intel_engines_are_idle(struct drm_i915_private *i915)
* If the driver is wedged, HW state may be very inconsistent and
* report that it is still busy, even though we have stopped using it.
*/
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
return true;
/* Already parked (and passed an idleness test); must still be idle */
- if (!READ_ONCE(i915->gt.awake))
+ if (!READ_ONCE(gt->awake))
return true;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (!intel_engine_is_idle(engine))
return false;
}
@@ -1210,12 +1125,12 @@ bool intel_engines_are_idle(struct drm_i915_private *i915)
return true;
}
-void intel_engines_reset_default_submission(struct drm_i915_private *i915)
+void intel_engines_reset_default_submission(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
engine->set_default_submission(engine);
}
@@ -1227,6 +1142,8 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
case 3:
/* maybe only uses physical not virtual addresses */
return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915));
+ case 4:
+ return !IS_I965G(engine->i915); /* who knows! */
case 6:
return engine->class != VIDEO_DECODE_CLASS; /* b0rked */
default:
@@ -1234,20 +1151,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
}
}
-unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- unsigned int which;
-
- which = 0;
- for_each_engine(engine, i915, id)
- if (engine->default_state)
- which |= BIT(engine->uabi_class);
-
- return which;
-}
-
static int print_sched_attr(struct drm_i915_private *i915,
const struct i915_sched_attr *attr,
char *buf, int x, int len)
@@ -1316,16 +1219,46 @@ static void hexdump(struct drm_printer *m, const void *buf, size_t len)
}
}
+static struct intel_timeline *get_timeline(struct i915_request *rq)
+{
+ struct intel_timeline *tl;
+
+ /*
+ * Even though we are holding the engine->active.lock here, there
+ * is no control over the submission queue per-se and we are
+ * inspecting the active state at a random point in time, with an
+ * unknown queue. Play safe and make sure the timeline remains valid.
+ * (Only being used for pretty printing, one extra kref shouldn't
+ * cause a camel stampede!)
+ */
+ rcu_read_lock();
+ tl = rcu_dereference(rq->timeline);
+ if (!kref_get_unless_zero(&tl->kref))
+ tl = NULL;
+ rcu_read_unlock();
+
+ return tl;
+}
+
+static const char *repr_timer(const struct timer_list *t)
+{
+ if (!READ_ONCE(t->expires))
+ return "inactive";
+
+ if (timer_pending(t))
+ return "active";
+
+ return "expired";
+}
+
static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct drm_i915_private *dev_priv = engine->i915;
- const struct intel_engine_execlists * const execlists =
- &engine->execlists;
- unsigned long flags;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
- if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7))
+ if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
drm_printf(m, "\tRING_START: 0x%08x\n",
ENGINE_READ(engine, RING_START));
@@ -1372,25 +1305,28 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}
if (HAS_EXECLISTS(dev_priv)) {
+ struct i915_request * const *port, *rq;
const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
const u8 num_entries = execlists->csb_size;
unsigned int idx;
u8 read, write;
- drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n",
- ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
- ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
- num_entries);
+ drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
+ yesno(test_bit(TASKLET_STATE_SCHED,
+ &engine->execlists.tasklet.state)),
+ enableddisabled(!atomic_read(&engine->execlists.tasklet.count)),
+ repr_timer(&engine->execlists.preempt),
+ repr_timer(&engine->execlists.timer));
read = execlists->csb_head;
write = READ_ONCE(*execlists->csb_write);
- drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n",
- read, write,
- yesno(test_bit(TASKLET_STATE_SCHED,
- &engine->execlists.tasklet.state)),
- enableddisabled(!atomic_read(&engine->execlists.tasklet.count)));
+ drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
+ ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
+ read, write, num_entries);
+
if (read >= num_entries)
read = 0;
if (write >= num_entries)
@@ -1403,29 +1339,47 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]);
}
- spin_lock_irqsave(&engine->active.lock, flags);
- for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
- struct i915_request *rq;
- unsigned int count;
+ execlists_active_lock_bh(execlists);
+ rcu_read_lock();
+ for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
-
- rq = port_unpack(&execlists->port[idx], &count);
- if (!rq) {
- drm_printf(m, "\t\tELSP[%d] idle\n", idx);
- } else if (!i915_request_signaled(rq)) {
- snprintf(hdr, sizeof(hdr),
- "\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
- idx, count,
- i915_ggtt_offset(rq->ring->vma),
- rq->timeline->hwsp_offset,
- hwsp_seqno(rq));
- print_request(m, rq, hdr);
- } else {
- print_request(m, rq, "\t\tELSP[%d] rq: ");
+ int len;
+
+ len = snprintf(hdr, sizeof(hdr),
+ "\t\tActive[%d]: ",
+ (int)(port - execlists->active));
+ if (!i915_request_signaled(rq)) {
+ struct intel_timeline *tl = get_timeline(rq);
+
+ len += snprintf(hdr + len, sizeof(hdr) - len,
+ "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
+ i915_ggtt_offset(rq->ring->vma),
+ tl ? tl->hwsp_offset : 0,
+ hwsp_seqno(rq));
+
+ if (tl)
+ intel_timeline_put(tl);
}
+ snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
+ print_request(m, rq, hdr);
+ }
+ for (port = execlists->pending; (rq = *port); port++) {
+ struct intel_timeline *tl = get_timeline(rq);
+ char hdr[80];
+
+ snprintf(hdr, sizeof(hdr),
+ "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
+ (int)(port - execlists->pending),
+ i915_ggtt_offset(rq->ring->vma),
+ tl ? tl->hwsp_offset : 0,
+ hwsp_seqno(rq));
+ print_request(m, rq, hdr);
+
+ if (tl)
+ intel_timeline_put(tl);
}
- drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ rcu_read_unlock();
+ execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1469,6 +1423,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
}
}
+static unsigned long list_count(struct list_head *list)
+{
+ struct list_head *pos;
+ unsigned long count = 0;
+
+ list_for_each(pos, list)
+ count++;
+
+ return count;
+}
+
void intel_engine_dump(struct intel_engine_cs *engine,
struct drm_printer *m,
const char *header, ...)
@@ -1486,12 +1451,21 @@ void intel_engine_dump(struct intel_engine_cs *engine,
va_end(ap);
}
- if (i915_reset_failed(engine->i915))
+ if (intel_gt_is_wedged(engine->gt))
drm_printf(m, "*** WEDGED ***\n");
drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count));
- drm_printf(m, "\tHangcheck: %d ms ago\n",
- jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
+ drm_printf(m, "\tBarriers?: %s\n",
+ yesno(!llist_empty(&engine->barrier_tasks)));
+ drm_printf(m, "\tLatency: %luus\n",
+ ewma__engine_latency_read(&engine->latency));
+
+ rcu_read_lock();
+ rq = READ_ONCE(engine->heartbeat.systole);
+ if (rq)
+ drm_printf(m, "\tHeartbeat: %d ms ago\n",
+ jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+ rcu_read_unlock();
drm_printf(m, "\tReset count: %d (global %d)\n",
i915_reset_engine_count(error, engine),
i915_reset_count(error));
@@ -1501,6 +1475,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_lock_irqsave(&engine->active.lock, flags);
rq = intel_engine_find_active_request(engine);
if (rq) {
+ struct intel_timeline *tl = get_timeline(rq);
+
print_request(m, rq, "\t\tactive ");
drm_printf(m, "\t\tring->start: 0x%08x\n",
@@ -1513,17 +1489,28 @@ void intel_engine_dump(struct intel_engine_cs *engine,
rq->ring->emit);
drm_printf(m, "\t\tring->space: 0x%08x\n",
rq->ring->space);
- drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
- rq->timeline->hwsp_offset);
+
+ if (tl) {
+ drm_printf(m, "\t\tring->hwsp: 0x%08x\n",
+ tl->hwsp_offset);
+ intel_timeline_put(tl);
+ }
print_request_ring(m, rq);
+
+ if (rq->context->lrc_reg_state) {
+ drm_printf(m, "Logical Ring Context:\n");
+ hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
+ }
}
+ drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold));
spin_unlock_irqrestore(&engine->active.lock, flags);
- wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm);
+ drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
+ wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
if (wakeref) {
intel_engine_print_registers(engine, m);
- intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(engine->uncore->rpm, wakeref);
} else {
drm_printf(m, "\tDevice is asleep; skipping register dump\n");
}
@@ -1538,29 +1525,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
intel_engine_print_breadcrumbs(engine, m);
}
-static u8 user_class_map[] = {
- [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS,
- [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS,
- [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS,
- [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS,
-};
-
-struct intel_engine_cs *
-intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
-{
- if (class >= ARRAY_SIZE(user_class_map))
- return NULL;
-
- class = user_class_map[class];
-
- GEM_BUG_ON(class > MAX_ENGINE_CLASS);
-
- if (instance > MAX_ENGINE_INSTANCE)
- return NULL;
-
- return i915->engine_class[class][instance];
-}
-
/**
* intel_enable_engine_stats() - Enable engine busy tracking on engine
* @engine: engine to enable stats collection
@@ -1578,8 +1542,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
- spin_lock_irqsave(&engine->active.lock, flags);
- write_seqlock(&engine->stats.lock);
+ execlists_active_lock_bh(execlists);
+ write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
@@ -1587,15 +1551,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
if (engine->stats.enabled++ == 0) {
- const struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
+ struct i915_request * const *port;
+ struct i915_request *rq;
engine->stats.enabled_at = ktime_get();
/* XXX submission method oblivious? */
- while (num_ports-- && port_isset(port)) {
+ for (port = execlists->active; (rq = *port); port++)
engine->stats.active++;
- port++;
+
+ for (port = execlists->pending; (rq = *port); port++) {
+ /* Exclude any contexts already counted in active */
+ if (!intel_context_inflight_count(rq->context))
+ engine->stats.active++;
}
if (engine->stats.active)
@@ -1603,8 +1571,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
- write_sequnlock(&engine->stats.lock);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+ execlists_active_unlock_bh(execlists);
return err;
}
@@ -1708,5 +1676,7 @@ intel_engine_find_active_request(struct intel_engine_cs *engine)
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "mock_engine.c"
+#include "selftest_engine.c"
#include "selftest_engine_cs.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
new file mode 100644
index 000000000000..6c6fd185457c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -0,0 +1,242 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_request.h"
+
+#include "intel_context.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_engine.h"
+#include "intel_gt.h"
+#include "intel_reset.h"
+
+/*
+ * While the engine is active, we send a periodic pulse along the engine
+ * to check on its health and to flush any idle-barriers. If that request
+ * is stuck, and we fail to preempt it, we declare the engine hung and
+ * issue a reset -- in the hope that restores progress.
+ */
+
+static bool next_heartbeat(struct intel_engine_cs *engine)
+{
+ long delay;
+
+ delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+ if (!delay)
+ return false;
+
+ delay = msecs_to_jiffies_timeout(delay);
+ if (delay >= HZ)
+ delay = round_jiffies_up_relative(delay);
+ schedule_delayed_work(&engine->heartbeat.work, delay);
+
+ return true;
+}
+
+static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
+{
+ engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
+ i915_request_add_active_barriers(rq);
+}
+
+static void show_heartbeat(const struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct drm_printer p = drm_debug_printer("heartbeat");
+
+ intel_engine_dump(engine, &p,
+ "%s heartbeat {prio:%d} not ticking\n",
+ engine->name,
+ rq->sched.attr.priority);
+}
+
+static void heartbeat(struct work_struct *wrk)
+{
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
+ };
+ struct intel_engine_cs *engine =
+ container_of(wrk, typeof(*engine), heartbeat.work.work);
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+
+ rq = engine->heartbeat.systole;
+ if (rq && i915_request_completed(rq)) {
+ i915_request_put(rq);
+ engine->heartbeat.systole = NULL;
+ }
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return;
+
+ if (intel_gt_is_wedged(engine->gt))
+ goto out;
+
+ if (engine->heartbeat.systole) {
+ if (engine->schedule &&
+ rq->sched.attr.priority < I915_PRIORITY_BARRIER) {
+ /*
+ * Gradually raise the priority of the heartbeat to
+ * give high priority work [which presumably desires
+ * low latency and no jitter] the chance to naturally
+ * complete before being preempted.
+ */
+ attr.priority = I915_PRIORITY_MASK;
+ if (rq->sched.attr.priority >= attr.priority)
+ attr.priority |= I915_USER_PRIORITY(I915_PRIORITY_HEARTBEAT);
+ if (rq->sched.attr.priority >= attr.priority)
+ attr.priority = I915_PRIORITY_BARRIER;
+
+ local_bh_disable();
+ engine->schedule(rq, &attr);
+ local_bh_enable();
+ } else {
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ show_heartbeat(rq, engine);
+
+ intel_gt_handle_error(engine->gt, engine->mask,
+ I915_ERROR_CAPTURE,
+ "stopped heartbeat on %s",
+ engine->name);
+ }
+ goto out;
+ }
+
+ if (engine->wakeref_serial == engine->serial)
+ goto out;
+
+ mutex_lock(&ce->timeline->mutex);
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq))
+ goto unlock;
+
+ idle_pulse(engine, rq);
+ if (i915_modparams.enable_hangcheck)
+ engine->heartbeat.systole = i915_request_get(rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+unlock:
+ mutex_unlock(&ce->timeline->mutex);
+out:
+ if (!next_heartbeat(engine))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+ intel_engine_pm_put(engine);
+}
+
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine)
+{
+ if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
+ return;
+
+ next_heartbeat(engine);
+}
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine)
+{
+ if (cancel_delayed_work(&engine->heartbeat.work))
+ i915_request_put(fetch_and_zero(&engine->heartbeat.systole));
+}
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine)
+{
+ INIT_DELAYED_WORK(&engine->heartbeat.work, heartbeat);
+}
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay)
+{
+ int err;
+
+ /* Send one last pulse before to cleanup persistent hogs */
+ if (!delay && IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) {
+ err = intel_engine_pulse(engine);
+ if (err)
+ return err;
+ }
+
+ WRITE_ONCE(engine->props.heartbeat_interval_ms, delay);
+
+ if (intel_engine_pm_get_if_awake(engine)) {
+ if (delay)
+ intel_engine_unpark_heartbeat(engine);
+ else
+ intel_engine_park_heartbeat(engine);
+ intel_engine_pm_put(engine);
+ }
+
+ return 0;
+}
+
+int intel_engine_pulse(struct intel_engine_cs *engine)
+{
+ struct i915_sched_attr attr = { .priority = I915_PRIORITY_BARRIER };
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_request *rq;
+ int err = 0;
+
+ if (!intel_engine_has_preemption(engine))
+ return -ENODEV;
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return 0;
+
+ if (mutex_lock_interruptible(&ce->timeline->mutex))
+ goto out_rpm;
+
+ intel_context_enter(ce);
+ rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
+ intel_context_exit(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_unlock;
+ }
+
+ __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
+ idle_pulse(engine, rq);
+
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, &attr);
+
+out_unlock:
+ mutex_unlock(&ce->timeline->mutex);
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+int intel_engine_flush_barriers(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+ int err = 0;
+
+ if (llist_empty(&engine->barrier_tasks))
+ return 0;
+
+ if (!intel_engine_pm_get_if_awake(engine))
+ return 0;
+
+ rq = i915_request_create(engine->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_rpm;
+ }
+
+ idle_pulse(engine, rq);
+ i915_request_add(rq);
+
+out_rpm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_engine_heartbeat.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
new file mode 100644
index 000000000000..a7b8c0f9e005
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.h
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_HEARTBEAT_H
+#define INTEL_ENGINE_HEARTBEAT_H
+
+struct intel_engine_cs;
+
+void intel_engine_init_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
+ unsigned long delay);
+
+void intel_engine_park_heartbeat(struct intel_engine_cs *engine);
+void intel_engine_unpark_heartbeat(struct intel_engine_cs *engine);
+
+int intel_engine_pulse(struct intel_engine_cs *engine);
+int intel_engine_flush_barriers(struct intel_engine_cs *engine);
+
+#endif /* INTEL_ENGINE_HEARTBEAT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index ae5b6baf6dff..ea90ab3e396e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -6,19 +6,26 @@
#include "i915_drv.h"
+#include "intel_context.h"
#include "intel_engine.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
+#include "intel_engine_pool.h"
+#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_ring.h"
static int __engine_unpark(struct intel_wakeref *wf)
{
struct intel_engine_cs *engine =
container_of(wf, typeof(*engine), wakeref);
+ struct intel_context *ce;
void *map;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
- intel_gt_pm_get(engine->i915);
+ intel_gt_pm_get(engine->gt);
/* Pin the default state for fast resets from atomic context. */
map = NULL;
@@ -28,45 +35,124 @@ static int __engine_unpark(struct intel_wakeref *wf)
if (!IS_ERR_OR_NULL(map))
engine->pinned_default_state = map;
+ /* Discard stale context state from across idling */
+ ce = engine->kernel_context;
+ if (ce) {
+ GEM_BUG_ON(test_bit(CONTEXT_VALID_BIT, &ce->flags));
+
+ /* First poison the image to verify we never fully trust it */
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) {
+ struct drm_i915_gem_object *obj = ce->state->obj;
+ int type = i915_coherent_map_type(engine->i915);
+
+ map = i915_gem_object_pin_map(obj, type);
+ if (!IS_ERR(map)) {
+ memset(map, CONTEXT_REDZONE, obj->base.size);
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+ }
+ }
+
+ ce->ops->reset(ce);
+ }
+
if (engine->unpark)
engine->unpark(engine);
- intel_engine_init_hangcheck(engine);
+ intel_engine_unpark_heartbeat(engine);
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_LOCKDEP)
+
+static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ mutex_acquire(&ce->timeline->mutex.dep_map, 2, 0, _THIS_IP_);
+
+ return flags;
+}
+
+static inline void __timeline_mark_unlock(struct intel_context *ce,
+ unsigned long flags)
+{
+ mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
+ local_irq_restore(flags);
+}
+
+#else
+
+static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
+{
return 0;
}
-void intel_engine_pm_get(struct intel_engine_cs *engine)
+static inline void __timeline_mark_unlock(struct intel_context *ce,
+ unsigned long flags)
{
- intel_wakeref_get(&engine->i915->runtime_pm, &engine->wakeref, __engine_unpark);
}
-void intel_engine_park(struct intel_engine_cs *engine)
+#endif /* !IS_ENABLED(CONFIG_LOCKDEP) */
+
+static void duration(struct dma_fence *fence, struct dma_fence_cb *cb)
{
+ struct i915_request *rq = to_request(fence);
+
+ ewma__engine_latency_add(&rq->engine->latency,
+ ktime_us_delta(rq->fence.timestamp,
+ rq->duration.emitted));
+}
+
+static void
+__queue_and_release_pm(struct i915_request *rq,
+ struct intel_timeline *tl,
+ struct intel_engine_cs *engine)
+{
+ struct intel_gt_timelines *timelines = &engine->gt->timelines;
+
+ ENGINE_TRACE(engine, "\n");
+
/*
- * We are committed now to parking this engine, make sure there
- * will be no more interrupts arriving later and the engine
- * is truly idle.
+ * We have to serialise all potential retirement paths with our
+ * submission, as we don't want to underflow either the
+ * engine->wakeref.counter or our timeline->active_count.
+ *
+ * Equally, we cannot allow a new submission to start until
+ * after we finish queueing, nor could we allow that submitter
+ * to retire us before we are ready!
*/
- if (wait_for(intel_engine_is_idle(engine), 10)) {
- struct drm_printer p = drm_debug_printer(__func__);
+ spin_lock(&timelines->lock);
- dev_err(engine->i915->drm.dev,
- "%s is not idle before parking\n",
- engine->name);
- intel_engine_dump(engine, &p, NULL);
- }
+ /* Let intel_gt_retire_requests() retire us (acquired under lock) */
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+
+ /* Hand the request over to HW and so engine_retire() */
+ __i915_request_queue(rq, NULL);
+
+ /* Let new submissions commence (and maybe retire this timeline) */
+ __intel_wakeref_defer_park(&engine->wakeref);
+
+ spin_unlock(&timelines->lock);
}
static bool switch_to_kernel_context(struct intel_engine_cs *engine)
{
+ struct intel_context *ce = engine->kernel_context;
struct i915_request *rq;
+ unsigned long flags;
+ bool result = true;
- /* Already inside the kernel context, safe to power down. */
- if (engine->wakeref_serial == engine->serial)
+ /* GPU is pointing to the void, as good as in the kernel context. */
+ if (intel_gt_is_wedged(engine->gt))
return true;
- /* GPU is pointing to the void, as good as in the kernel context. */
- if (i915_reset_failed(engine->i915))
+ GEM_BUG_ON(!intel_context_is_barrier(ce));
+
+ /* Already inside the kernel context, safe to power down. */
+ if (engine->wakeref_serial == engine->serial)
return true;
/*
@@ -80,19 +166,70 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine)
* This should hold true as we can only park the engine after
* retiring the last request, thus all rings should be empty and
* all timelines idle.
+ *
+ * For unlocking, there are 2 other parties and the GPU who have a
+ * stake here.
+ *
+ * A new gpu user will be waiting on the engine-pm to start their
+ * engine_unpark. New waiters are predicated on engine->wakeref.count
+ * and so intel_wakeref_defer_park() acts like a mutex_unlock of the
+ * engine->wakeref.
+ *
+ * The other party is intel_gt_retire_requests(), which is walking the
+ * list of active timelines looking for completions. Meanwhile as soon
+ * as we call __i915_request_queue(), the GPU may complete our request.
+ * Ergo, if we put ourselves on the timelines.active_list
+ * (se intel_timeline_enter()) before we increment the
+ * engine->wakeref.count, we may see the request completion and retire
+ * it causing an undeflow of the engine->wakeref.
*/
- rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT);
+ flags = __timeline_mark_lock(ce);
+ GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0);
+
+ rq = __i915_request_create(ce, GFP_NOWAIT);
if (IS_ERR(rq))
/* Context switch failed, hope for the best! Maybe reset? */
- return true;
+ goto out_unlock;
/* Check again on the next retirement. */
engine->wakeref_serial = engine->serial + 1;
+ i915_request_add_active_barriers(rq);
- i915_request_add_barriers(rq);
- __i915_request_commit(rq);
+ /* Install ourselves as a preemption barrier */
+ rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+ if (likely(!__i915_request_commit(rq))) { /* engine should be idle! */
+ /*
+ * Use an interrupt for precise measurement of duration,
+ * otherwise we rely on someone else retiring all the requests
+ * which may delay the signaling (i.e. we will likely wait
+ * until the background request retirement running every
+ * second or two).
+ */
+ BUILD_BUG_ON(sizeof(rq->duration) > sizeof(rq->submitq));
+ dma_fence_add_callback(&rq->fence, &rq->duration.cb, duration);
+ rq->duration.emitted = ktime_get();
+ }
+
+ /* Expose ourselves to the world */
+ __queue_and_release_pm(rq, ce->timeline, engine);
- return false;
+ result = false;
+out_unlock:
+ __timeline_mark_unlock(ce, flags);
+ return result;
+}
+
+static void call_idle_barriers(struct intel_engine_cs *engine)
+{
+ struct llist_node *node, *next;
+
+ llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) {
+ struct dma_fence_cb *cb =
+ container_of((struct list_head *)node,
+ typeof(*cb), node);
+
+ cb->func(ERR_PTR(-EAGAIN), cb);
+ }
}
static int __engine_park(struct intel_wakeref *wf)
@@ -112,9 +249,13 @@ static int __engine_park(struct intel_wakeref *wf)
if (!switch_to_kernel_context(engine))
return -EBUSY;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
+
+ call_idle_barriers(engine); /* cleanup after wedging */
+ intel_engine_park_heartbeat(engine);
intel_engine_disarm_breadcrumbs(engine);
+ intel_engine_pool_park(&engine->pool);
/* Must be reset upon idling, or we may miss the busy wakeup. */
GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN);
@@ -129,16 +270,24 @@ static int __engine_park(struct intel_wakeref *wf)
engine->execlists.no_priolist = false;
- intel_gt_pm_put(engine->i915);
+ /* While gt calls i915_vma_parked(), we have to break the lock cycle */
+ intel_gt_pm_put_async(engine->gt);
return 0;
}
-void intel_engine_pm_put(struct intel_engine_cs *engine)
-{
- intel_wakeref_put(&engine->i915->runtime_pm, &engine->wakeref, __engine_park);
-}
+static const struct intel_wakeref_ops wf_ops = {
+ .get = __engine_unpark,
+ .put = __engine_park,
+};
void intel_engine_init__pm(struct intel_engine_cs *engine)
{
- intel_wakeref_init(&engine->wakeref);
+ struct intel_runtime_pm *rpm = engine->uncore->rpm;
+
+ intel_wakeref_init(&engine->wakeref, rpm, &wf_ops);
+ intel_engine_init_heartbeat(engine);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_engine_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index a11c893f64c6..e52c2b0cb245 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -7,21 +7,60 @@
#ifndef INTEL_ENGINE_PM_H
#define INTEL_ENGINE_PM_H
+#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
-struct drm_i915_private;
+static inline bool
+intel_engine_pm_is_awake(const struct intel_engine_cs *engine)
+{
+ return intel_wakeref_is_active(&engine->wakeref);
+}
-void intel_engine_pm_get(struct intel_engine_cs *engine);
-void intel_engine_pm_put(struct intel_engine_cs *engine);
+static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+ intel_wakeref_get(&engine->wakeref);
+}
-static inline bool
-intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
+static inline bool intel_engine_pm_get_if_awake(struct intel_engine_cs *engine)
{
return intel_wakeref_get_if_active(&engine->wakeref);
}
-void intel_engine_park(struct intel_engine_cs *engine);
+static inline void intel_engine_pm_put(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine)
+{
+ intel_wakeref_put_async(&engine->wakeref);
+}
+
+static inline void intel_engine_pm_flush(struct intel_engine_cs *engine)
+{
+ intel_wakeref_unlock_wait(&engine->wakeref);
+}
+
+static inline struct i915_request *
+intel_engine_create_kernel_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ /*
+ * The engine->kernel_context is special as it is used inside
+ * the engine-pm barrier (see __engine_park()), circumventing
+ * the usual mutexes and relying on the engine-pm barrier
+ * instead. So whenever we use the engine->kernel_context
+ * outside of the barrier, we must manually handle the
+ * engine wakeref to serialise with the use inside.
+ */
+ intel_engine_pm_get(engine);
+ rq = i915_request_create(engine->kernel_context);
+ intel_engine_pm_put(engine);
+
+ return rq;
+}
void intel_engine_init__pm(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
new file mode 100644
index 000000000000..397186818305
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -0,0 +1,190 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+
+#include "i915_drv.h"
+#include "intel_engine_pm.h"
+#include "intel_engine_pool.h"
+
+static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool)
+{
+ return container_of(pool, struct intel_engine_cs, pool);
+}
+
+static struct list_head *
+bucket_for_size(struct intel_engine_pool *pool, size_t sz)
+{
+ int n;
+
+ /*
+ * Compute a power-of-two bucket, but throw everything greater than
+ * 16KiB into the same bucket: i.e. the buckets hold objects of
+ * (1 page, 2 pages, 4 pages, 8+ pages).
+ */
+ n = fls(sz >> PAGE_SHIFT) - 1;
+ if (n >= ARRAY_SIZE(pool->cache_list))
+ n = ARRAY_SIZE(pool->cache_list) - 1;
+
+ return &pool->cache_list[n];
+}
+
+static void node_free(struct intel_engine_pool_node *node)
+{
+ i915_gem_object_put(node->obj);
+ i915_active_fini(&node->active);
+ kfree(node);
+}
+
+static int pool_active(struct i915_active *ref)
+{
+ struct intel_engine_pool_node *node =
+ container_of(ref, typeof(*node), active);
+ struct dma_resv *resv = node->obj->base.resv;
+ int err;
+
+ if (dma_resv_trylock(resv)) {
+ dma_resv_add_excl_fence(resv, NULL);
+ dma_resv_unlock(resv);
+ }
+
+ err = i915_gem_object_pin_pages(node->obj);
+ if (err)
+ return err;
+
+ /* Hide this pinned object from the shrinker until retired */
+ i915_gem_object_make_unshrinkable(node->obj);
+
+ return 0;
+}
+
+__i915_active_call
+static void pool_retire(struct i915_active *ref)
+{
+ struct intel_engine_pool_node *node =
+ container_of(ref, typeof(*node), active);
+ struct intel_engine_pool *pool = node->pool;
+ struct list_head *list = bucket_for_size(pool, node->obj->base.size);
+ unsigned long flags;
+
+ GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
+
+ i915_gem_object_unpin_pages(node->obj);
+
+ /* Return this object to the shrinker pool */
+ i915_gem_object_make_purgeable(node->obj);
+
+ spin_lock_irqsave(&pool->lock, flags);
+ list_add(&node->link, list);
+ spin_unlock_irqrestore(&pool->lock, flags);
+}
+
+static struct intel_engine_pool_node *
+node_create(struct intel_engine_pool *pool, size_t sz)
+{
+ struct intel_engine_cs *engine = to_engine(pool);
+ struct intel_engine_pool_node *node;
+ struct drm_i915_gem_object *obj;
+
+ node = kmalloc(sizeof(*node),
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+
+ node->pool = pool;
+ i915_active_init(&node->active, pool_active, pool_retire);
+
+ obj = i915_gem_object_create_internal(engine->i915, sz);
+ if (IS_ERR(obj)) {
+ i915_active_fini(&node->active);
+ kfree(node);
+ return ERR_CAST(obj);
+ }
+
+ i915_gem_object_set_readonly(obj);
+
+ node->obj = obj;
+ return node;
+}
+
+static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine)
+{
+ if (intel_engine_is_virtual(engine))
+ engine = intel_virtual_engine_get_sibling(engine, 0);
+
+ GEM_BUG_ON(!engine);
+ return &engine->pool;
+}
+
+struct intel_engine_pool_node *
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size)
+{
+ struct intel_engine_pool *pool = lookup_pool(engine);
+ struct intel_engine_pool_node *node;
+ struct list_head *list;
+ unsigned long flags;
+ int ret;
+
+ GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool)));
+
+ size = PAGE_ALIGN(size);
+ list = bucket_for_size(pool, size);
+
+ spin_lock_irqsave(&pool->lock, flags);
+ list_for_each_entry(node, list, link) {
+ if (node->obj->base.size < size)
+ continue;
+ list_del(&node->link);
+ break;
+ }
+ spin_unlock_irqrestore(&pool->lock, flags);
+
+ if (&node->link == list) {
+ node = node_create(pool, size);
+ if (IS_ERR(node))
+ return node;
+ }
+
+ ret = i915_active_acquire(&node->active);
+ if (ret) {
+ node_free(node);
+ return ERR_PTR(ret);
+ }
+
+ return node;
+}
+
+void intel_engine_pool_init(struct intel_engine_pool *pool)
+{
+ int n;
+
+ spin_lock_init(&pool->lock);
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+ INIT_LIST_HEAD(&pool->cache_list[n]);
+}
+
+void intel_engine_pool_park(struct intel_engine_pool *pool)
+{
+ int n;
+
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
+ struct list_head *list = &pool->cache_list[n];
+ struct intel_engine_pool_node *node, *nn;
+
+ list_for_each_entry_safe(node, nn, list, link)
+ node_free(node);
+
+ INIT_LIST_HEAD(list);
+ }
+}
+
+void intel_engine_pool_fini(struct intel_engine_pool *pool)
+{
+ int n;
+
+ for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
+ GEM_BUG_ON(!list_empty(&pool->cache_list[n]));
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
new file mode 100644
index 000000000000..1bd89cadc3b7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.h
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_POOL_H
+#define INTEL_ENGINE_POOL_H
+
+#include "intel_engine_pool_types.h"
+#include "i915_active.h"
+#include "i915_request.h"
+
+struct intel_engine_pool_node *
+intel_engine_get_pool(struct intel_engine_cs *engine, size_t size);
+
+static inline int
+intel_engine_pool_mark_active(struct intel_engine_pool_node *node,
+ struct i915_request *rq)
+{
+ return i915_active_add_request(&node->active, rq);
+}
+
+static inline void
+intel_engine_pool_put(struct intel_engine_pool_node *node)
+{
+ i915_active_release(&node->active);
+}
+
+void intel_engine_pool_init(struct intel_engine_pool *pool);
+void intel_engine_pool_park(struct intel_engine_pool *pool);
+void intel_engine_pool_fini(struct intel_engine_pool *pool);
+
+#endif /* INTEL_ENGINE_POOL_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
new file mode 100644
index 000000000000..e31ee361b76f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2018 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_POOL_TYPES_H
+#define INTEL_ENGINE_POOL_TYPES_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include "i915_active_types.h"
+
+struct drm_i915_gem_object;
+
+struct intel_engine_pool {
+ spinlock_t lock;
+ struct list_head cache_list[4];
+};
+
+struct intel_engine_pool_node {
+ struct i915_active active;
+ struct drm_i915_gem_object *obj;
+ struct list_head link;
+ struct intel_engine_pool *pool;
+};
+
+#endif /* INTEL_ENGINE_POOL_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 43e975a26016..92be41a6903c 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -7,23 +7,47 @@
#ifndef __INTEL_ENGINE_TYPES__
#define __INTEL_ENGINE_TYPES__
+#include <linux/average.h>
#include <linux/hashtable.h>
#include <linux/irq_work.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/llist.h>
+#include <linux/rbtree.h>
+#include <linux/timer.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "i915_gem.h"
-#include "i915_gem_batch_pool.h"
#include "i915_pmu.h"
#include "i915_priolist_types.h"
#include "i915_selftest.h"
-#include "i915_timeline_types.h"
+#include "intel_engine_pool_types.h"
#include "intel_sseu.h"
+#include "intel_timeline_types.h"
#include "intel_wakeref.h"
#include "intel_workarounds_types.h"
+/* Legacy HW Engine ID */
+
+#define RCS0_HW 0
+#define VCS0_HW 1
+#define BCS0_HW 2
+#define VECS0_HW 3
+#define VCS1_HW 4
+#define VCS2_HW 6
+#define VCS3_HW 7
+#define VECS1_HW 12
+
+/* Gen11+ HW Engine class + instance */
+#define RENDER_CLASS 0
+#define VIDEO_DECODE_CLASS 1
+#define VIDEO_ENHANCEMENT_CLASS 2
+#define COPY_ENGINE_CLASS 3
+#define OTHER_CLASS 4
+#define MAX_ENGINE_CLASS 4
+#define MAX_ENGINE_INSTANCE 3
+
#define I915_MAX_SLICES 3
#define I915_MAX_SUBSLICES 8
@@ -35,6 +59,8 @@ struct drm_i915_reg_table;
struct i915_gem_context;
struct i915_request;
struct i915_sched_attr;
+struct intel_gt;
+struct intel_ring;
struct intel_uncore;
typedef u8 intel_engine_mask_t;
@@ -53,44 +79,6 @@ struct intel_instdone {
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
};
-struct intel_engine_hangcheck {
- u64 acthd;
- u32 last_ring;
- u32 last_head;
- unsigned long action_timestamp;
- struct intel_instdone instdone;
-};
-
-struct intel_ring {
- struct kref ref;
- struct i915_vma *vma;
- void *vaddr;
-
- struct i915_timeline *timeline;
- struct list_head request_list;
- struct list_head active_link;
-
- /*
- * As we have two types of rings, one global to the engine used
- * by ringbuffer submission and those that are exclusive to a
- * context used by execlists, we have to play safe and allow
- * atomic updates to the pin_count. However, the actual pinning
- * of the context is either done during initialisation for
- * ringbuffer submission or serialised as part of the context
- * pinning for execlists, and so we do not need a mutex ourselves
- * to serialise intel_ring_pin/intel_ring_unpin.
- */
- atomic_t pin_count;
-
- u32 head;
- u32 tail;
- u32 emit;
-
- u32 space;
- u32 size;
- u32 effective_size;
-};
-
/*
* we use a single page to load ctx workarounds so all of these
* values are referred in terms of dwords
@@ -129,8 +117,12 @@ enum intel_engine_id {
VECS1,
#define _VECS(n) (VECS0 + (n))
I915_NUM_ENGINES
+#define INVALID_ENGINE ((enum intel_engine_id)-1)
};
+/* A simple estimator for the round-trip latency of an engine */
+DECLARE_EWMA(_engine_latency, 6, 4)
+
struct st_preempt_hang {
struct completion completion;
unsigned int count;
@@ -150,6 +142,16 @@ struct intel_engine_execlists {
struct tasklet_struct tasklet;
/**
+ * @timer: kick the current context if its timeslice expires
+ */
+ struct timer_list timer;
+
+ /**
+ * @preempt: reset the current context if it fails to give way
+ */
+ struct timer_list preempt;
+
+ /**
* @default_priolist: priority list for I915_PRIORITY_NORMAL
*/
struct i915_priolist default_priolist;
@@ -172,51 +174,28 @@ struct intel_engine_execlists {
*/
u32 __iomem *ctrl_reg;
+#define EXECLIST_MAX_PORTS 2
/**
- * @port: execlist port states
+ * @active: the currently known context executing on HW
+ */
+ struct i915_request * const *active;
+ /**
+ * @inflight: the set of contexts submitted and acknowleged by HW
*
- * For each hardware ELSP (ExecList Submission Port) we keep
- * track of the last request and the number of times we submitted
- * that port to hw. We then count the number of times the hw reports
- * a context completion or preemption. As only one context can
- * be active on hw, we limit resubmission of context to port[0]. This
- * is called Lite Restore, of the context.
+ * The set of inflight contexts is managed by reading CS events
+ * from the HW. On a context-switch event (not preemption), we
+ * know the HW has transitioned from port0 to port1, and we
+ * advance our inflight/active tracking accordingly.
*/
- struct execlist_port {
- /**
- * @request_count: combined request and submission count
- */
- struct i915_request *request_count;
-#define EXECLIST_COUNT_BITS 2
-#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
-#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
-#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
-#define port_set(p, packed) ((p)->request_count = (packed))
-#define port_isset(p) ((p)->request_count)
-#define port_index(p, execlists) ((p) - (execlists)->port)
-
- /**
- * @context_id: context ID for port
- */
- GEM_DEBUG_DECL(u32 context_id);
-
-#define EXECLIST_MAX_PORTS 2
- } port[EXECLIST_MAX_PORTS];
-
+ struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
/**
- * @active: is the HW active? We consider the HW as active after
- * submitting any context for execution and until we have seen the
- * last context completion event. After that, we do not expect any
- * more events until we submit, and so can park the HW.
+ * @pending: the next set of contexts submitted to ELSP
*
- * As we have a small number of different sources from which we feed
- * the HW, we track the state of each inside a single bitfield.
+ * We store the array of contexts that we submit to HW (via ELSP) and
+ * promote them to the inflight array once HW has signaled the
+ * preemption or idle-to-active event.
*/
- unsigned int active;
-#define EXECLISTS_ACTIVE_USER 0
-#define EXECLISTS_ACTIVE_PREEMPT 1
-#define EXECLISTS_ACTIVE_HWACK 2
+ struct i915_request *pending[EXECLIST_MAX_PORTS + 1];
/**
* @port_mask: number of execlist ports - 1
@@ -224,6 +203,16 @@ struct intel_engine_execlists {
unsigned int port_mask;
/**
+ * @switch_priority_hint: Second context priority.
+ *
+ * We submit multiple contexts to the HW simultaneously and would
+ * like to occasionally switch between them to emulate timeslicing.
+ * To know when timeslicing is suitable, we track the priority of
+ * the context submitted second.
+ */
+ int switch_priority_hint;
+
+ /**
* @queue_priority_hint: Highest pending priority.
*
* When we add requests into the queue, or adjust the priority of
@@ -258,11 +247,6 @@ struct intel_engine_execlists {
u32 *csb_status;
/**
- * @preempt_complete_status: expected CSB upon completing preemption
- */
- u32 preempt_complete_status;
-
- /**
* @csb_size: context status buffer FIFO size
*/
u8 csb_size;
@@ -279,39 +263,52 @@ struct intel_engine_execlists {
struct intel_engine_cs {
struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct intel_uncore *uncore;
char name[INTEL_ENGINE_CS_MAX_NAME];
enum intel_engine_id id;
+ enum intel_engine_id legacy_idx;
+
unsigned int hw_id;
unsigned int guc_id;
- intel_engine_mask_t mask;
- u8 uabi_class;
+ intel_engine_mask_t mask;
u8 class;
u8 instance;
+
+ u16 uabi_class;
+ u16 uabi_instance;
+
+ u32 uabi_capabilities;
u32 context_size;
u32 mmio_base;
- u32 uabi_capabilities;
+ unsigned int context_tag;
+#define NUM_CONTEXT_TAG roundup_pow_of_two(2 * EXECLIST_MAX_PORTS)
- struct intel_sseu sseu;
+ struct rb_node uabi_node;
- struct intel_ring *buffer;
+ struct intel_sseu sseu;
struct {
spinlock_t lock;
struct list_head requests;
+ struct list_head hold; /* ready requests, but on hold */
} active;
struct llist_head barrier_tasks;
struct intel_context *kernel_context; /* pinned */
- struct intel_context *preempt_context; /* pinned; optional */
intel_engine_mask_t saturated; /* submitting semaphores too late? */
+ struct {
+ struct delayed_work work;
+ struct i915_request *systole;
+ } heartbeat;
+
unsigned long serial;
unsigned long wakeref_serial;
@@ -319,6 +316,18 @@ struct intel_engine_cs {
struct drm_i915_gem_object *default_state;
void *pinned_default_state;
+ struct {
+ struct intel_ring *ring;
+ struct intel_timeline *timeline;
+ } legacy;
+
+ /*
+ * We track the average duration of the idle pulse on parking the
+ * engine to keep an estimate of the how the fast the engine is
+ * under ideal conditions.
+ */
+ struct ewma__engine_latency latency;
+
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
@@ -375,7 +384,7 @@ struct intel_engine_cs {
* when the command parser is enabled. Prevents the client from
* modifying the batch contents after software parsing.
*/
- struct i915_gem_batch_pool batch_pool;
+ struct intel_engine_pool pool;
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
@@ -392,7 +401,10 @@ struct intel_engine_cs {
struct {
void (*prepare)(struct intel_engine_cs *engine);
- void (*reset)(struct intel_engine_cs *engine, bool stalled);
+
+ void (*rewind)(struct intel_engine_cs *engine, bool stalled);
+ void (*cancel)(struct intel_engine_cs *engine);
+
void (*finish)(struct intel_engine_cs *engine);
} reset;
@@ -404,7 +416,6 @@ struct intel_engine_cs {
const struct intel_context_ops *cops;
int (*request_alloc)(struct i915_request *rq);
- int (*init_context)(struct i915_request *rq);
int (*emit_flush)(struct i915_request *request, u32 mode);
#define EMIT_INVALIDATE BIT(0)
@@ -443,29 +454,29 @@ struct intel_engine_cs {
void (*schedule)(struct i915_request *request,
const struct i915_sched_attr *attr);
- /*
- * Cancel all requests on the hardware, or queued for execution.
- * This should only cancel the ready requests that have been
- * submitted to the engine (via the engine->submit_request callback).
- * This is called when marking the device as wedged.
- */
- void (*cancel_requests)(struct intel_engine_cs *engine);
-
- void (*destroy)(struct intel_engine_cs *engine);
+ void (*release)(struct intel_engine_cs *engine);
struct intel_engine_execlists execlists;
+ /*
+ * Keep track of completed timelines on this engine for early
+ * retirement with the goal of quickly enabling powersaving as
+ * soon as the engine is idle.
+ */
+ struct intel_timeline *retire;
+ struct work_struct retire_work;
+
/* status_notifier: list of callbacks for context-switch changes */
struct atomic_notifier_head context_status_notifier;
- struct intel_engine_hangcheck hangcheck;
-
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -523,12 +534,25 @@ struct intel_engine_cs {
*/
ktime_t total;
} stats;
+
+ struct {
+ unsigned long heartbeat_interval_ms;
+ unsigned long preempt_timeout_ms;
+ unsigned long stop_timeout_ms;
+ unsigned long timeslice_duration_ms;
+ } props;
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
@@ -561,20 +585,24 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_IS_VIRTUAL;
}
-#define instdone_slice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
+static inline bool
+intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
+{
+ return engine->flags & I915_ENGINE_HAS_RELATIVE_MMIO;
+}
-#define instdone_subslice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
+#define instdone_has_slice(dev_priv___, sseu___, slice___) \
+ ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
- for ((slice__) = 0, (subslice__) = 0; \
- (slice__) < I915_MAX_SLICES; \
- (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
- (slice__) += ((subslice__) == 0)) \
- for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
- (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
+ (IS_GEN(dev_priv__, 7) ? (1 & BIT(subslice__)) : \
+ intel_sseu_has_subslice(sseu__, 0, subslice__))
+#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
+ for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
+ (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
+ (slice_) += ((subslice_) == 0)) \
+ for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
+ (instdone_has_subslice(dev_priv_, sseu_, slice_, \
+ subslice_)))
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c
new file mode 100644
index 000000000000..9e7f12bef828
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c
@@ -0,0 +1,299 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/llist.h>
+
+#include "i915_drv.h"
+#include "intel_engine.h"
+#include "intel_engine_user.h"
+#include "intel_gt.h"
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance)
+{
+ struct rb_node *p = i915->uabi_engines.rb_node;
+
+ while (p) {
+ struct intel_engine_cs *it =
+ rb_entry(p, typeof(*it), uabi_node);
+
+ if (class < it->uabi_class)
+ p = p->rb_left;
+ else if (class > it->uabi_class ||
+ instance > it->uabi_instance)
+ p = p->rb_right;
+ else if (instance < it->uabi_instance)
+ p = p->rb_left;
+ else
+ return it;
+ }
+
+ return NULL;
+}
+
+void intel_engine_add_user(struct intel_engine_cs *engine)
+{
+ llist_add((struct llist_node *)&engine->uabi_node,
+ (struct llist_head *)&engine->i915->uabi_engines);
+}
+
+static const u8 uabi_classes[] = {
+ [RENDER_CLASS] = I915_ENGINE_CLASS_RENDER,
+ [COPY_ENGINE_CLASS] = I915_ENGINE_CLASS_COPY,
+ [VIDEO_DECODE_CLASS] = I915_ENGINE_CLASS_VIDEO,
+ [VIDEO_ENHANCEMENT_CLASS] = I915_ENGINE_CLASS_VIDEO_ENHANCE,
+};
+
+static int engine_cmp(void *priv, struct list_head *A, struct list_head *B)
+{
+ const struct intel_engine_cs *a =
+ container_of((struct rb_node *)A, typeof(*a), uabi_node);
+ const struct intel_engine_cs *b =
+ container_of((struct rb_node *)B, typeof(*b), uabi_node);
+
+ if (uabi_classes[a->class] < uabi_classes[b->class])
+ return -1;
+ if (uabi_classes[a->class] > uabi_classes[b->class])
+ return 1;
+
+ if (a->instance < b->instance)
+ return -1;
+ if (a->instance > b->instance)
+ return 1;
+
+ return 0;
+}
+
+static struct llist_node *get_engines(struct drm_i915_private *i915)
+{
+ return llist_del_all((struct llist_head *)&i915->uabi_engines);
+}
+
+static void sort_engines(struct drm_i915_private *i915,
+ struct list_head *engines)
+{
+ struct llist_node *pos, *next;
+
+ llist_for_each_safe(pos, next, get_engines(i915)) {
+ struct intel_engine_cs *engine =
+ container_of((struct rb_node *)pos, typeof(*engine),
+ uabi_node);
+ list_add((struct list_head *)&engine->uabi_node, engines);
+ }
+ list_sort(NULL, engines, engine_cmp);
+}
+
+static void set_scheduler_caps(struct drm_i915_private *i915)
+{
+ static const struct {
+ u8 engine;
+ u8 sched;
+ } map[] = {
+#define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) }
+ MAP(HAS_PREEMPTION, PREEMPTION),
+ MAP(HAS_SEMAPHORES, SEMAPHORES),
+ MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS),
+#undef MAP
+ };
+ struct intel_engine_cs *engine;
+ u32 enabled, disabled;
+
+ enabled = 0;
+ disabled = 0;
+ for_each_uabi_engine(engine, i915) { /* all engines must agree! */
+ int i;
+
+ if (engine->schedule)
+ enabled |= (I915_SCHEDULER_CAP_ENABLED |
+ I915_SCHEDULER_CAP_PRIORITY);
+ else
+ disabled |= (I915_SCHEDULER_CAP_ENABLED |
+ I915_SCHEDULER_CAP_PRIORITY);
+
+ for (i = 0; i < ARRAY_SIZE(map); i++) {
+ if (engine->flags & BIT(map[i].engine))
+ enabled |= BIT(map[i].sched);
+ else
+ disabled |= BIT(map[i].sched);
+ }
+ }
+
+ i915->caps.scheduler = enabled & ~disabled;
+ if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED))
+ i915->caps.scheduler = 0;
+}
+
+const char *intel_engine_class_repr(u8 class)
+{
+ static const char * const uabi_names[] = {
+ [RENDER_CLASS] = "rcs",
+ [COPY_ENGINE_CLASS] = "bcs",
+ [VIDEO_DECODE_CLASS] = "vcs",
+ [VIDEO_ENHANCEMENT_CLASS] = "vecs",
+ };
+
+ if (class >= ARRAY_SIZE(uabi_names) || !uabi_names[class])
+ return "xxx";
+
+ return uabi_names[class];
+}
+
+struct legacy_ring {
+ struct intel_gt *gt;
+ u8 class;
+ u8 instance;
+};
+
+static int legacy_ring_idx(const struct legacy_ring *ring)
+{
+ static const struct {
+ u8 base, max;
+ } map[] = {
+ [RENDER_CLASS] = { RCS0, 1 },
+ [COPY_ENGINE_CLASS] = { BCS0, 1 },
+ [VIDEO_DECODE_CLASS] = { VCS0, I915_MAX_VCS },
+ [VIDEO_ENHANCEMENT_CLASS] = { VECS0, I915_MAX_VECS },
+ };
+
+ if (GEM_DEBUG_WARN_ON(ring->class >= ARRAY_SIZE(map)))
+ return INVALID_ENGINE;
+
+ if (GEM_DEBUG_WARN_ON(ring->instance >= map[ring->class].max))
+ return INVALID_ENGINE;
+
+ return map[ring->class].base + ring->instance;
+}
+
+static void add_legacy_ring(struct legacy_ring *ring,
+ struct intel_engine_cs *engine)
+{
+ if (engine->gt != ring->gt || engine->class != ring->class) {
+ ring->gt = engine->gt;
+ ring->class = engine->class;
+ ring->instance = 0;
+ }
+
+ engine->legacy_idx = legacy_ring_idx(ring);
+ if (engine->legacy_idx != INVALID_ENGINE)
+ ring->instance++;
+}
+
+void intel_engines_driver_register(struct drm_i915_private *i915)
+{
+ struct legacy_ring ring = {};
+ u8 uabi_instances[4] = {};
+ struct list_head *it, *next;
+ struct rb_node **p, *prev;
+ LIST_HEAD(engines);
+
+ sort_engines(i915, &engines);
+
+ prev = NULL;
+ p = &i915->uabi_engines.rb_node;
+ list_for_each_safe(it, next, &engines) {
+ struct intel_engine_cs *engine =
+ container_of((struct rb_node *)it, typeof(*engine),
+ uabi_node);
+ char old[sizeof(engine->name)];
+
+ if (intel_gt_has_init_error(engine->gt))
+ continue; /* ignore incomplete engines */
+
+ GEM_BUG_ON(engine->class >= ARRAY_SIZE(uabi_classes));
+ engine->uabi_class = uabi_classes[engine->class];
+
+ GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances));
+ engine->uabi_instance = uabi_instances[engine->uabi_class]++;
+
+ /* Replace the internal name with the final user facing name */
+ memcpy(old, engine->name, sizeof(engine->name));
+ scnprintf(engine->name, sizeof(engine->name), "%s%u",
+ intel_engine_class_repr(engine->class),
+ engine->uabi_instance);
+ DRM_DEBUG_DRIVER("renamed %s to %s\n", old, engine->name);
+
+ rb_link_node(&engine->uabi_node, prev, p);
+ rb_insert_color(&engine->uabi_node, &i915->uabi_engines);
+
+ GEM_BUG_ON(intel_engine_lookup_user(i915,
+ engine->uabi_class,
+ engine->uabi_instance) != engine);
+
+ /* Fix up the mapping to match default execbuf::user_map[] */
+ add_legacy_ring(&ring, engine);
+
+ prev = &engine->uabi_node;
+ p = &prev->rb_right;
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_I915_SELFTESTS) &&
+ IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+ struct intel_engine_cs *engine;
+ unsigned int isolation;
+ int class, inst;
+ int errors = 0;
+
+ for (class = 0; class < ARRAY_SIZE(uabi_instances); class++) {
+ for (inst = 0; inst < uabi_instances[class]; inst++) {
+ engine = intel_engine_lookup_user(i915,
+ class, inst);
+ if (!engine) {
+ pr_err("UABI engine not found for { class:%d, instance:%d }\n",
+ class, inst);
+ errors++;
+ continue;
+ }
+
+ if (engine->uabi_class != class ||
+ engine->uabi_instance != inst) {
+ pr_err("Wrong UABI engine:%s { class:%d, instance:%d } found for { class:%d, instance:%d }\n",
+ engine->name,
+ engine->uabi_class,
+ engine->uabi_instance,
+ class, inst);
+ errors++;
+ continue;
+ }
+ }
+ }
+
+ /*
+ * Make sure that classes with multiple engine instances all
+ * share the same basic configuration.
+ */
+ isolation = intel_engines_has_context_isolation(i915);
+ for_each_uabi_engine(engine, i915) {
+ unsigned int bit = BIT(engine->uabi_class);
+ unsigned int expected = engine->default_state ? bit : 0;
+
+ if ((isolation & bit) != expected) {
+ pr_err("mismatching default context state for class %d on engine %s\n",
+ engine->uabi_class, engine->name);
+ errors++;
+ }
+ }
+
+ if (WARN(errors, "Invalid UABI engine mapping found"))
+ i915->uabi_engines = RB_ROOT;
+ }
+
+ set_scheduler_caps(i915);
+}
+
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *engine;
+ unsigned int which;
+
+ which = 0;
+ for_each_uabi_engine(engine, i915)
+ if (engine->default_state)
+ which |= BIT(engine->uabi_class);
+
+ return which;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.h b/drivers/gpu/drm/i915/gt/intel_engine_user.h
new file mode 100644
index 000000000000..f845ea1cbfaa
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_engine_user.h
@@ -0,0 +1,25 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_ENGINE_USER_H
+#define INTEL_ENGINE_USER_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct intel_engine_cs;
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
+
+void intel_engine_add_user(struct intel_engine_cs *engine);
+void intel_engines_driver_register(struct drm_i915_private *i915);
+
+const char *intel_engine_class_repr(u8 class);
+
+#endif /* INTEL_ENGINE_USER_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
new file mode 100644
index 000000000000..531d501be01f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -0,0 +1,1486 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/stop_machine.h>
+
+#include <asm/set_memory.h>
+#include <asm/smp.h>
+
+#include "intel_gt.h"
+#include "i915_drv.h"
+#include "i915_scatterlist.h"
+#include "i915_vgpu.h"
+
+#include "intel_gtt.h"
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma);
+
+static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
+ unsigned long color,
+ u64 *start,
+ u64 *end)
+{
+ if (i915_node_color_differs(node, color))
+ *start += I915_GTT_PAGE_SIZE;
+
+ /*
+ * Also leave a space between the unallocated reserved node after the
+ * GTT and any objects within the GTT, i.e. we use the color adjustment
+ * to insert a guard page to prevent prefetches crossing over the
+ * GTT boundary.
+ */
+ node = list_next_entry(node, node_list);
+ if (node->color != color)
+ *end -= I915_GTT_PAGE_SIZE;
+}
+
+static int ggtt_init_hw(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+
+ i915_address_space_init(&ggtt->vm, VM_CLASS_GGTT);
+
+ ggtt->vm.is_ggtt = true;
+
+ /* Only VLV supports read-only GGTT mappings */
+ ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
+
+ if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
+ ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
+
+ if (ggtt->mappable_end) {
+ if (!io_mapping_init_wc(&ggtt->iomap,
+ ggtt->gmadr.start,
+ ggtt->mappable_end)) {
+ ggtt->vm.cleanup(&ggtt->vm);
+ return -EIO;
+ }
+
+ ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start,
+ ggtt->mappable_end);
+ }
+
+ i915_ggtt_init_fences(ggtt);
+
+ return 0;
+}
+
+/**
+ * i915_ggtt_init_hw - Initialize GGTT hardware
+ * @i915: i915 device
+ */
+int i915_ggtt_init_hw(struct drm_i915_private *i915)
+{
+ int ret;
+
+ stash_init(&i915->mm.wc_stash);
+
+ /*
+ * Note that we use page colouring to enforce a guard page at the
+ * end of the address space. This is required as the CS may prefetch
+ * beyond the end of the batch buffer, across the page boundary,
+ * and beyond the end of the GTT if we do not provide a guard.
+ */
+ ret = ggtt_init_hw(&i915->ggtt);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Certain Gen5 chipsets require require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+static bool needs_idle_maps(struct drm_i915_private *i915)
+{
+ /*
+ * Query intel_iommu to see if we need the workaround. Presumably that
+ * was loaded first.
+ */
+ return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active();
+}
+
+static void ggtt_suspend_mappings(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+
+ /*
+ * Don't bother messing with faults pre GEN6 as we have little
+ * documentation supporting that it's a good idea.
+ */
+ if (INTEL_GEN(i915) < 6)
+ return;
+
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+ ggtt->invalidate(ggtt);
+}
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915)
+{
+ ggtt_suspend_mappings(&i915->ggtt);
+}
+
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+ spin_lock_irq(&uncore->lock);
+ intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+ intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
+ spin_unlock_irq(&uncore->lock);
+}
+
+static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+ /*
+ * Note that as an uncached mmio write, this will flush the
+ * WCB of the writes into the GGTT before it triggers the invalidate.
+ */
+ intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+}
+
+static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+
+ gen8_ggtt_invalidate(ggtt);
+
+ if (INTEL_GEN(i915) >= 12)
+ intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
+ GEN12_GUC_TLB_INV_CR_INVALIDATE);
+ else
+ intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
+}
+
+static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
+{
+ intel_gtt_chipset_flush();
+}
+
+static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
+{
+ writeq(pte, addr);
+}
+
+static void gen8_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 unused)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen8_pte_t __iomem *pte =
+ (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ gen8_set_pte(pte, gen8_pte_encode(addr, level, 0));
+
+ ggtt->invalidate(ggtt);
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ struct sgt_iter sgt_iter;
+ gen8_pte_t __iomem *gtt_entries;
+ const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0);
+ dma_addr_t addr;
+
+ /*
+ * Note that we ignore PTE_READ_ONLY here. The caller must be careful
+ * not to allow the user to override access to a read only page.
+ */
+
+ gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
+ gtt_entries += vma->node.start / I915_GTT_PAGE_SIZE;
+ for_each_sgt_daddr(addr, sgt_iter, vma->pages)
+ gen8_set_pte(gtt_entries++, pte_encode | addr);
+
+ /*
+ * We want to flush the TLBs only after we're certain all the PTE
+ * updates have finished.
+ */
+ ggtt->invalidate(ggtt);
+}
+
+static void gen6_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *pte =
+ (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
+
+ iowrite32(vm->pte_encode(addr, level, flags), pte);
+
+ ggtt->invalidate(ggtt);
+}
+
+/*
+ * Binds an object into the global gtt with the specified cache level.
+ * The object will be accessible to the GPU via commands whose operands
+ * reference offsets within the global GTT as well as accessible by the GPU
+ * through the GMADR mapped BAR (i915->mm.gtt->gtt).
+ */
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
+ unsigned int i = vma->node.start / I915_GTT_PAGE_SIZE;
+ struct sgt_iter iter;
+ dma_addr_t addr;
+
+ for_each_sgt_daddr(addr, iter, vma->pages)
+ iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
+
+ /*
+ * We want to flush the TLBs only after we're certain all the PTE
+ * updates have finished.
+ */
+ ggtt->invalidate(ggtt);
+}
+
+static void nop_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+}
+
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ const gen8_pte_t scratch_pte = vm->scratch[0].encode;
+ gen8_pte_t __iomem *gtt_base =
+ (gen8_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ for (i = 0; i < num_entries; i++)
+ gen8_set_pte(&gtt_base[i], scratch_pte);
+}
+
+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+ /*
+ * Make sure the internal GAM fifo has been cleared of all GTT
+ * writes before exiting stop_machine(). This guarantees that
+ * any aperture accesses waiting to start in another process
+ * cannot back up behind the GTT writes causing a hang.
+ * The register can be any arbitrary GAM register.
+ */
+ intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+ struct i915_address_space *vm;
+ dma_addr_t addr;
+ u64 offset;
+ enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+ struct insert_page *arg = _arg;
+
+ gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level level,
+ u32 unused)
+{
+ struct insert_page arg = { vm, addr, offset, level };
+
+ stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+ struct i915_address_space *vm;
+ struct i915_vma *vma;
+ enum i915_cache_level level;
+ u32 flags;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+ struct insert_entries *arg = _arg;
+
+ gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ struct insert_entries arg = { vm, vma, level, flags };
+
+ stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+ struct i915_address_space *vm;
+ u64 start;
+ u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+ struct clear_range *arg = _arg;
+
+ gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+ bxt_vtd_ggtt_wa(arg->vm);
+
+ return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+ u64 start,
+ u64 length)
+{
+ struct clear_range arg = { vm, start, length };
+
+ stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+ unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
+ unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
+ gen6_pte_t scratch_pte, __iomem *gtt_base =
+ (gen6_pte_t __iomem *)ggtt->gsm + first_entry;
+ const int max_entries = ggtt_total_entries(ggtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ scratch_pte = vm->scratch[0].encode;
+ for (i = 0; i < num_entries; i++)
+ iowrite32(scratch_pte, &gtt_base[i]);
+}
+
+static void i915_ggtt_insert_page(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
+}
+
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 unused)
+{
+ unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+ AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
+
+ intel_gtt_insert_sg_entries(vma->pages, vma->node.start >> PAGE_SHIFT,
+ flags);
+}
+
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
+}
+
+static int ggtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ struct drm_i915_gem_object *obj = vma->obj;
+ u32 pte_flags;
+
+ /* Applicable to VLV (gen8+ do not support RO in the GGTT) */
+ pte_flags = 0;
+ if (i915_gem_object_is_readonly(obj))
+ pte_flags |= PTE_READ_ONLY;
+
+ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+ vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
+
+ /*
+ * Without aliasing PPGTT there's no difference between
+ * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
+ * upgrade to both bound if we bind either to avoid double-binding.
+ */
+ atomic_or(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND, &vma->flags);
+
+ return 0;
+}
+
+static void ggtt_unbind_vma(struct i915_vma *vma)
+{
+ vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
+{
+ u64 size;
+ int ret;
+
+ if (!USES_GUC(ggtt->vm.i915))
+ return 0;
+
+ GEM_BUG_ON(ggtt->vm.total <= GUC_GGTT_TOP);
+ size = ggtt->vm.total - GUC_GGTT_TOP;
+
+ ret = i915_gem_gtt_reserve(&ggtt->vm, &ggtt->uc_fw, size,
+ GUC_GGTT_TOP, I915_COLOR_UNEVICTABLE,
+ PIN_NOEVICT);
+ if (ret)
+ DRM_DEBUG_DRIVER("Failed to reserve top of GGTT for GuC\n");
+
+ return ret;
+}
+
+static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
+{
+ if (drm_mm_node_allocated(&ggtt->uc_fw))
+ drm_mm_remove_node(&ggtt->uc_fw);
+}
+
+static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
+{
+ ggtt_release_guc_top(ggtt);
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ drm_mm_remove_node(&ggtt->error_capture);
+ mutex_destroy(&ggtt->error_mutex);
+}
+
+static int init_ggtt(struct i915_ggtt *ggtt)
+{
+ /*
+ * Let GEM Manage all of the aperture.
+ *
+ * However, leave one page at the end still bound to the scratch page.
+ * There are a number of places where the hardware apparently prefetches
+ * past the end of the object, and we've seen multiple hangs with the
+ * GPU head pointer stuck in a batchbuffer bound at the last page of the
+ * aperture. One page should be enough to keep any prefetching inside
+ * of the aperture.
+ */
+ unsigned long hole_start, hole_end;
+ struct drm_mm_node *entry;
+ int ret;
+
+ /*
+ * GuC requires all resources that we're sharing with it to be placed in
+ * non-WOPCM memory. If GuC is not present or not in use we still need a
+ * small bias as ring wraparound at offset 0 sometimes hangs. No idea
+ * why.
+ */
+ ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
+ intel_wopcm_guc_size(&ggtt->vm.i915->wopcm));
+
+ ret = intel_vgt_balloon(ggtt);
+ if (ret)
+ return ret;
+
+ mutex_init(&ggtt->error_mutex);
+ if (ggtt->mappable_end) {
+ /* Reserve a mappable slot for our lockless error capture */
+ ret = drm_mm_insert_node_in_range(&ggtt->vm.mm,
+ &ggtt->error_capture,
+ PAGE_SIZE, 0,
+ I915_COLOR_UNEVICTABLE,
+ 0, ggtt->mappable_end,
+ DRM_MM_INSERT_LOW);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * The upper portion of the GuC address space has a sizeable hole
+ * (several MB) that is inaccessible by GuC. Reserve this range within
+ * GGTT as it can comfortably hold GuC/HuC firmware images.
+ */
+ ret = ggtt_reserve_guc_top(ggtt);
+ if (ret)
+ goto err;
+
+ /* Clear any non-preallocated blocks */
+ drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
+ DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
+ hole_start, hole_end);
+ ggtt->vm.clear_range(&ggtt->vm, hole_start,
+ hole_end - hole_start);
+ }
+
+ /* And finally clear the reserved guard page */
+ ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
+
+ return 0;
+
+err:
+ cleanup_init_ggtt(ggtt);
+ return ret;
+}
+
+static int aliasing_gtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ u32 pte_flags;
+ int ret;
+
+ /* Currently applicable only to VLV */
+ pte_flags = 0;
+ if (i915_gem_object_is_readonly(vma->obj))
+ pte_flags |= PTE_READ_ONLY;
+
+ if (flags & I915_VMA_LOCAL_BIND) {
+ struct i915_ppgtt *alias = i915_vm_to_ggtt(vma->vm)->alias;
+
+ if (flags & I915_VMA_ALLOC) {
+ ret = alias->vm.allocate_va_range(&alias->vm,
+ vma->node.start,
+ vma->size);
+ if (ret)
+ return ret;
+
+ set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+ }
+
+ GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT,
+ __i915_vma_flags(vma)));
+ alias->vm.insert_entries(&alias->vm, vma,
+ cache_level, pte_flags);
+ }
+
+ if (flags & I915_VMA_GLOBAL_BIND)
+ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+
+ return 0;
+}
+
+static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
+{
+ if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
+ struct i915_address_space *vm = vma->vm;
+
+ vm->clear_range(vm, vma->node.start, vma->size);
+ }
+
+ if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma))) {
+ struct i915_address_space *vm =
+ &i915_vm_to_ggtt(vma->vm)->alias->vm;
+
+ vm->clear_range(vm, vma->node.start, vma->size);
+ }
+}
+
+static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+ struct i915_ppgtt *ppgtt;
+ int err;
+
+ ppgtt = i915_ppgtt_create(ggtt->vm.gt);
+ if (IS_ERR(ppgtt))
+ return PTR_ERR(ppgtt);
+
+ if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
+ err = -ENODEV;
+ goto err_ppgtt;
+ }
+
+ /*
+ * Note we only pre-allocate as far as the end of the global
+ * GTT. On 48b / 4-level page-tables, the difference is very,
+ * very significant! We have to preallocate as GVT/vgpu does
+ * not like the page directory disappearing.
+ */
+ err = ppgtt->vm.allocate_va_range(&ppgtt->vm, 0, ggtt->vm.total);
+ if (err)
+ goto err_ppgtt;
+
+ ggtt->alias = ppgtt;
+ ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags;
+
+ GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != ggtt_bind_vma);
+ ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
+
+ GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != ggtt_unbind_vma);
+ ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
+
+ return 0;
+
+err_ppgtt:
+ i915_vm_put(&ppgtt->vm);
+ return err;
+}
+
+static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
+{
+ struct i915_ppgtt *ppgtt;
+
+ ppgtt = fetch_and_zero(&ggtt->alias);
+ if (!ppgtt)
+ return;
+
+ i915_vm_put(&ppgtt->vm);
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+}
+
+int i915_init_ggtt(struct drm_i915_private *i915)
+{
+ int ret;
+
+ ret = init_ggtt(&i915->ggtt);
+ if (ret)
+ return ret;
+
+ if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
+ ret = init_aliasing_ppgtt(&i915->ggtt);
+ if (ret)
+ cleanup_init_ggtt(&i915->ggtt);
+ }
+
+ return 0;
+}
+
+static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
+{
+ struct i915_vma *vma, *vn;
+
+ atomic_set(&ggtt->vm.open, 0);
+
+ rcu_barrier(); /* flush the RCU'ed__i915_vm_release */
+ flush_workqueue(ggtt->vm.i915->wq);
+
+ mutex_lock(&ggtt->vm.mutex);
+
+ list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link)
+ WARN_ON(__i915_vma_unbind(vma));
+
+ if (drm_mm_node_allocated(&ggtt->error_capture))
+ drm_mm_remove_node(&ggtt->error_capture);
+ mutex_destroy(&ggtt->error_mutex);
+
+ ggtt_release_guc_top(ggtt);
+ intel_vgt_deballoon(ggtt);
+
+ ggtt->vm.cleanup(&ggtt->vm);
+
+ mutex_unlock(&ggtt->vm.mutex);
+ i915_address_space_fini(&ggtt->vm);
+
+ arch_phys_wc_del(ggtt->mtrr);
+
+ if (ggtt->iomap.size)
+ io_mapping_fini(&ggtt->iomap);
+}
+
+/**
+ * i915_ggtt_driver_release - Clean up GGTT hardware initialization
+ * @i915: i915 device
+ */
+void i915_ggtt_driver_release(struct drm_i915_private *i915)
+{
+ struct pagevec *pvec;
+
+ fini_aliasing_ppgtt(&i915->ggtt);
+
+ ggtt_cleanup_hw(&i915->ggtt);
+
+ pvec = &i915->mm.wc_stash.pvec;
+ if (pvec->nr) {
+ set_pages_array_wb(pvec->pages, pvec->nr);
+ __pagevec_release(pvec);
+ }
+}
+
+static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
+{
+ snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
+ snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
+ return snb_gmch_ctl << 20;
+}
+
+static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
+{
+ bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
+ bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
+ if (bdw_gmch_ctl)
+ bdw_gmch_ctl = 1 << bdw_gmch_ctl;
+
+#ifdef CONFIG_X86_32
+ /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */
+ if (bdw_gmch_ctl > 4)
+ bdw_gmch_ctl = 4;
+#endif
+
+ return bdw_gmch_ctl << 20;
+}
+
+static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
+{
+ gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
+ gmch_ctrl &= SNB_GMCH_GGMS_MASK;
+
+ if (gmch_ctrl)
+ return 1 << (20 + gmch_ctrl);
+
+ return 0;
+}
+
+static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = i915->drm.pdev;
+ phys_addr_t phys_addr;
+ int ret;
+
+ /* For Modern GENs the PTEs and register space are split in the BAR */
+ phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
+
+ /*
+ * On BXT+/CNL+ writes larger than 64 bit to the GTT pagetable range
+ * will be dropped. For WC mappings in general we have 64 byte burst
+ * writes when the WC buffer is flushed, so we can't use it, but have to
+ * resort to an uncached mapping. The WC issue is easily caught by the
+ * readback check when writing GTT PTE entries.
+ */
+ if (IS_GEN9_LP(i915) || INTEL_GEN(i915) >= 10)
+ ggtt->gsm = ioremap(phys_addr, size);
+ else
+ ggtt->gsm = ioremap_wc(phys_addr, size);
+ if (!ggtt->gsm) {
+ DRM_ERROR("Failed to map the ggtt page table\n");
+ return -ENOMEM;
+ }
+
+ ret = setup_scratch_page(&ggtt->vm, GFP_DMA32);
+ if (ret) {
+ DRM_ERROR("Scratch setup failed\n");
+ /* iounmap will also get called at remove, but meh */
+ iounmap(ggtt->gsm);
+ return ret;
+ }
+
+ ggtt->vm.scratch[0].encode =
+ ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]),
+ I915_CACHE_NONE, 0);
+
+ return 0;
+}
+
+int ggtt_set_pages(struct i915_vma *vma)
+{
+ int ret;
+
+ GEM_BUG_ON(vma->pages);
+
+ ret = i915_get_ggtt_vma_pages(vma);
+ if (ret)
+ return ret;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
+
+ return 0;
+}
+
+static void gen6_gmch_remove(struct i915_address_space *vm)
+{
+ struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
+
+ iounmap(ggtt->gsm);
+ cleanup_scratch_page(vm);
+}
+
+static struct resource pci_resource(struct pci_dev *pdev, int bar)
+{
+ return (struct resource)DEFINE_RES_MEM(pci_resource_start(pdev, bar),
+ pci_resource_len(pdev, bar));
+}
+
+static int gen8_gmch_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = i915->drm.pdev;
+ unsigned int size;
+ u16 snb_gmch_ctl;
+ int err;
+
+ /* TODO: We're not aware of mappable constraints on gen8 yet */
+ if (!IS_DGFX(i915)) {
+ ggtt->gmadr = pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+ if (!err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+ if (err)
+ DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+ if (IS_CHERRYVIEW(i915))
+ size = chv_get_total_gtt_size(snb_gmch_ctl);
+ else
+ size = gen8_get_total_gtt_size(snb_gmch_ctl);
+
+ ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
+ ggtt->vm.cleanup = gen6_gmch_remove;
+ ggtt->vm.insert_page = gen8_ggtt_insert_page;
+ ggtt->vm.clear_range = nop_clear_range;
+ if (intel_scanout_needs_vtd_wa(i915))
+ ggtt->vm.clear_range = gen8_ggtt_clear_range;
+
+ ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
+
+ /* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+ if (intel_ggtt_update_needs_vtd_wa(i915) ||
+ IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
+ ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+ ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
+ if (ggtt->vm.clear_range != nop_clear_range)
+ ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+ }
+
+ ggtt->invalidate = gen8_ggtt_invalidate;
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
+ ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+ ggtt->vm.pte_encode = gen8_pte_encode;
+
+ setup_private_pat(ggtt->vm.gt->uncore);
+
+ return ggtt_probe_common(ggtt, size);
+}
+
+static u64 snb_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_L3_LLC:
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ MISSING_CASE(level);
+ }
+
+ return pte;
+}
+
+static u64 ivb_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_L3_LLC:
+ pte |= GEN7_PTE_CACHE_L3_LLC;
+ break;
+ case I915_CACHE_LLC:
+ pte |= GEN6_PTE_CACHE_LLC;
+ break;
+ case I915_CACHE_NONE:
+ pte |= GEN6_PTE_UNCACHED;
+ break;
+ default:
+ MISSING_CASE(level);
+ }
+
+ return pte;
+}
+
+static u64 byt_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ if (!(flags & PTE_READ_ONLY))
+ pte |= BYT_PTE_WRITEABLE;
+
+ if (level != I915_CACHE_NONE)
+ pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
+
+ return pte;
+}
+
+static u64 hsw_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ if (level != I915_CACHE_NONE)
+ pte |= HSW_WB_LLC_AGE3;
+
+ return pte;
+}
+
+static u64 iris_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ break;
+ case I915_CACHE_WT:
+ pte |= HSW_WT_ELLC_LLC_AGE3;
+ break;
+ default:
+ pte |= HSW_WB_ELLC_LLC_AGE3;
+ break;
+ }
+
+ return pte;
+}
+
+static int gen6_gmch_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ struct pci_dev *pdev = i915->drm.pdev;
+ unsigned int size;
+ u16 snb_gmch_ctl;
+ int err;
+
+ ggtt->gmadr = pci_resource(pdev, 2);
+ ggtt->mappable_end = resource_size(&ggtt->gmadr);
+
+ /*
+ * 64/512MB is the current min/max we actually know of, but this is
+ * just a coarse sanity check.
+ */
+ if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
+ DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
+ return -ENXIO;
+ }
+
+ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+ if (!err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+ if (err)
+ DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
+ pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
+
+ size = gen6_get_total_gtt_size(snb_gmch_ctl);
+ ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
+
+ ggtt->vm.clear_range = nop_clear_range;
+ if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
+ ggtt->vm.clear_range = gen6_ggtt_clear_range;
+ ggtt->vm.insert_page = gen6_ggtt_insert_page;
+ ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
+ ggtt->vm.cleanup = gen6_gmch_remove;
+
+ ggtt->invalidate = gen6_ggtt_invalidate;
+
+ if (HAS_EDRAM(i915))
+ ggtt->vm.pte_encode = iris_pte_encode;
+ else if (IS_HASWELL(i915))
+ ggtt->vm.pte_encode = hsw_pte_encode;
+ else if (IS_VALLEYVIEW(i915))
+ ggtt->vm.pte_encode = byt_pte_encode;
+ else if (INTEL_GEN(i915) >= 7)
+ ggtt->vm.pte_encode = ivb_pte_encode;
+ else
+ ggtt->vm.pte_encode = snb_pte_encode;
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
+ ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+ return ggtt_probe_common(ggtt, size);
+}
+
+static void i915_gmch_remove(struct i915_address_space *vm)
+{
+ intel_gmch_remove();
+}
+
+static int i915_gmch_probe(struct i915_ggtt *ggtt)
+{
+ struct drm_i915_private *i915 = ggtt->vm.i915;
+ phys_addr_t gmadr_base;
+ int ret;
+
+ ret = intel_gmch_probe(i915->bridge_dev, i915->drm.pdev, NULL);
+ if (!ret) {
+ DRM_ERROR("failed to set up gmch\n");
+ return -EIO;
+ }
+
+ intel_gtt_get(&ggtt->vm.total, &gmadr_base, &ggtt->mappable_end);
+
+ ggtt->gmadr =
+ (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
+
+ ggtt->do_idle_maps = needs_idle_maps(i915);
+ ggtt->vm.insert_page = i915_ggtt_insert_page;
+ ggtt->vm.insert_entries = i915_ggtt_insert_entries;
+ ggtt->vm.clear_range = i915_ggtt_clear_range;
+ ggtt->vm.cleanup = i915_gmch_remove;
+
+ ggtt->invalidate = gmch_ggtt_invalidate;
+
+ ggtt->vm.vma_ops.bind_vma = ggtt_bind_vma;
+ ggtt->vm.vma_ops.unbind_vma = ggtt_unbind_vma;
+ ggtt->vm.vma_ops.set_pages = ggtt_set_pages;
+ ggtt->vm.vma_ops.clear_pages = clear_pages;
+
+ if (unlikely(ggtt->do_idle_maps))
+ dev_notice(i915->drm.dev,
+ "Applying Ironlake quirks for intel_iommu\n");
+
+ return 0;
+}
+
+static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ int ret;
+
+ ggtt->vm.gt = gt;
+ ggtt->vm.i915 = i915;
+ ggtt->vm.dma = &i915->drm.pdev->dev;
+
+ if (INTEL_GEN(i915) <= 5)
+ ret = i915_gmch_probe(ggtt);
+ else if (INTEL_GEN(i915) < 8)
+ ret = gen6_gmch_probe(ggtt);
+ else
+ ret = gen8_gmch_probe(ggtt);
+ if (ret)
+ return ret;
+
+ if ((ggtt->vm.total - 1) >> 32) {
+ DRM_ERROR("We never expected a Global GTT with more than 32bits"
+ " of address space! Found %lldM!\n",
+ ggtt->vm.total >> 20);
+ ggtt->vm.total = 1ULL << 32;
+ ggtt->mappable_end =
+ min_t(u64, ggtt->mappable_end, ggtt->vm.total);
+ }
+
+ if (ggtt->mappable_end > ggtt->vm.total) {
+ DRM_ERROR("mappable aperture extends past end of GGTT,"
+ " aperture=%pa, total=%llx\n",
+ &ggtt->mappable_end, ggtt->vm.total);
+ ggtt->mappable_end = ggtt->vm.total;
+ }
+
+ /* GMADR is the PCI mmio aperture into the global GTT. */
+ DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->vm.total >> 20);
+ DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
+ DRM_DEBUG_DRIVER("DSM size = %lluM\n",
+ (u64)resource_size(&intel_graphics_stolen_res) >> 20);
+
+ return 0;
+}
+
+/**
+ * i915_ggtt_probe_hw - Probe GGTT hardware location
+ * @i915: i915 device
+ */
+int i915_ggtt_probe_hw(struct drm_i915_private *i915)
+{
+ int ret;
+
+ ret = ggtt_probe_hw(&i915->ggtt, &i915->gt);
+ if (ret)
+ return ret;
+
+ if (intel_vtd_active())
+ dev_info(i915->drm.dev, "VT-d active for gfx access\n");
+
+ return 0;
+}
+
+int i915_ggtt_enable_hw(struct drm_i915_private *i915)
+{
+ if (INTEL_GEN(i915) < 6 && !intel_enable_gtt())
+ return -EIO;
+
+ return 0;
+}
+
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt)
+{
+ GEM_BUG_ON(ggtt->invalidate != gen8_ggtt_invalidate);
+
+ ggtt->invalidate = guc_ggtt_invalidate;
+
+ ggtt->invalidate(ggtt);
+}
+
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt)
+{
+ /* XXX Temporary pardon for error unload */
+ if (ggtt->invalidate == gen8_ggtt_invalidate)
+ return;
+
+ /* We should only be called after i915_ggtt_enable_guc() */
+ GEM_BUG_ON(ggtt->invalidate != guc_ggtt_invalidate);
+
+ ggtt->invalidate = gen8_ggtt_invalidate;
+
+ ggtt->invalidate(ggtt);
+}
+
+static void ggtt_restore_mappings(struct i915_ggtt *ggtt)
+{
+ struct i915_vma *vma;
+ bool flush = false;
+ int open;
+
+ intel_gt_check_and_clear_faults(ggtt->vm.gt);
+
+ mutex_lock(&ggtt->vm.mutex);
+
+ /* First fill our portion of the GTT with scratch pages */
+ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
+
+ /* Skip rewriting PTE on VMA unbind. */
+ open = atomic_xchg(&ggtt->vm.open, 0);
+
+ /* clflush objects bound into the GGTT and rebind them. */
+ list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+ continue;
+
+ clear_bit(I915_VMA_GLOBAL_BIND_BIT, __i915_vma_flags(vma));
+ WARN_ON(i915_vma_bind(vma,
+ obj ? obj->cache_level : 0,
+ PIN_GLOBAL, NULL));
+ if (obj) { /* only used during resume => exclusive access */
+ flush |= fetch_and_zero(&obj->write_domain);
+ obj->read_domains |= I915_GEM_DOMAIN_GTT;
+ }
+ }
+
+ atomic_set(&ggtt->vm.open, open);
+ ggtt->invalidate(ggtt);
+
+ mutex_unlock(&ggtt->vm.mutex);
+
+ if (flush)
+ wbinvd_on_all_cpus();
+}
+
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915)
+{
+ struct i915_ggtt *ggtt = &i915->ggtt;
+
+ ggtt_restore_mappings(ggtt);
+
+ if (INTEL_GEN(i915) >= 8)
+ setup_private_pat(ggtt->vm.gt->uncore);
+}
+
+static struct scatterlist *
+rotate_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int column, row;
+ unsigned int src_idx;
+
+ for (column = 0; column < width; column++) {
+ src_idx = stride * (height - 1) + column + offset;
+ for (row = 0; row < height; row++) {
+ st->nents++;
+ /*
+ * We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+ sg_set_page(sg, NULL, I915_GTT_PAGE_SIZE, 0);
+ sg_dma_address(sg) =
+ i915_gem_object_get_dma_address(obj, src_idx);
+ sg_dma_len(sg) = I915_GTT_PAGE_SIZE;
+ sg = sg_next(sg);
+ src_idx -= stride;
+ }
+ }
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_rotate_pages(struct intel_rotation_info *rot_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_rotation_info_size(rot_info);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
+ sg = rotate_pages(obj, rot_info->plane[i].offset,
+ rot_info->plane[i].width, rot_info->plane[i].height,
+ rot_info->plane[i].stride, st, sg);
+ }
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ DRM_DEBUG_DRIVER("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
+static struct scatterlist *
+remap_pages(struct drm_i915_gem_object *obj, unsigned int offset,
+ unsigned int width, unsigned int height,
+ unsigned int stride,
+ struct sg_table *st, struct scatterlist *sg)
+{
+ unsigned int row;
+
+ for (row = 0; row < height; row++) {
+ unsigned int left = width * I915_GTT_PAGE_SIZE;
+
+ while (left) {
+ dma_addr_t addr;
+ unsigned int length;
+
+ /*
+ * We don't need the pages, but need to initialize
+ * the entries so the sg list can be happily traversed.
+ * The only thing we need are DMA addresses.
+ */
+
+ addr = i915_gem_object_get_dma_address_len(obj, offset, &length);
+
+ length = min(left, length);
+
+ st->nents++;
+
+ sg_set_page(sg, NULL, length, 0);
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = length;
+ sg = sg_next(sg);
+
+ offset += length / I915_GTT_PAGE_SIZE;
+ left -= length;
+ }
+
+ offset += stride - width;
+ }
+
+ return sg;
+}
+
+static noinline struct sg_table *
+intel_remap_pages(struct intel_remapped_info *rem_info,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned int size = intel_remapped_info_size(rem_info);
+ struct sg_table *st;
+ struct scatterlist *sg;
+ int ret = -ENOMEM;
+ int i;
+
+ /* Allocate target SG list. */
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, size, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ st->nents = 0;
+ sg = st->sgl;
+
+ for (i = 0 ; i < ARRAY_SIZE(rem_info->plane); i++) {
+ sg = remap_pages(obj, rem_info->plane[i].offset,
+ rem_info->plane[i].width, rem_info->plane[i].height,
+ rem_info->plane[i].stride, st, sg);
+ }
+
+ i915_sg_trim(st);
+
+ return st;
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+
+ DRM_DEBUG_DRIVER("Failed to create remapped mapping for object size %zu! (%ux%u tiles, %u pages)\n",
+ obj->base.size, rem_info->plane[0].width, rem_info->plane[0].height, size);
+
+ return ERR_PTR(ret);
+}
+
+static noinline struct sg_table *
+intel_partial_pages(const struct i915_ggtt_view *view,
+ struct drm_i915_gem_object *obj)
+{
+ struct sg_table *st;
+ struct scatterlist *sg, *iter;
+ unsigned int count = view->partial.size;
+ unsigned int offset;
+ int ret = -ENOMEM;
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (!st)
+ goto err_st_alloc;
+
+ ret = sg_alloc_table(st, count, GFP_KERNEL);
+ if (ret)
+ goto err_sg_alloc;
+
+ iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
+ GEM_BUG_ON(!iter);
+
+ sg = st->sgl;
+ st->nents = 0;
+ do {
+ unsigned int len;
+
+ len = min(iter->length - (offset << PAGE_SHIFT),
+ count << PAGE_SHIFT);
+ sg_set_page(sg, NULL, len, 0);
+ sg_dma_address(sg) =
+ sg_dma_address(iter) + (offset << PAGE_SHIFT);
+ sg_dma_len(sg) = len;
+
+ st->nents++;
+ count -= len >> PAGE_SHIFT;
+ if (count == 0) {
+ sg_mark_end(sg);
+ i915_sg_trim(st); /* Drop any unused tail entries. */
+
+ return st;
+ }
+
+ sg = __sg_next(sg);
+ iter = __sg_next(iter);
+ offset = 0;
+ } while (1);
+
+err_sg_alloc:
+ kfree(st);
+err_st_alloc:
+ return ERR_PTR(ret);
+}
+
+static int
+i915_get_ggtt_vma_pages(struct i915_vma *vma)
+{
+ int ret;
+
+ /*
+ * The vma->pages are only valid within the lifespan of the borrowed
+ * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
+ * must be the vma->pages. A simple rule is that vma->pages must only
+ * be accessed when the obj->mm.pages are pinned.
+ */
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));
+
+ switch (vma->ggtt_view.type) {
+ default:
+ GEM_BUG_ON(vma->ggtt_view.type);
+ /* fall through */
+ case I915_GGTT_VIEW_NORMAL:
+ vma->pages = vma->obj->mm.pages;
+ return 0;
+
+ case I915_GGTT_VIEW_ROTATED:
+ vma->pages =
+ intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
+ break;
+
+ case I915_GGTT_VIEW_REMAPPED:
+ vma->pages =
+ intel_remap_pages(&vma->ggtt_view.remapped, vma->obj);
+ break;
+
+ case I915_GGTT_VIEW_PARTIAL:
+ vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
+ break;
+ }
+
+ ret = 0;
+ if (IS_ERR(vma->pages)) {
+ ret = PTR_ERR(vma->pages);
+ vma->pages = NULL;
+ DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
+ vma->ggtt_view.type, ret);
+ }
+ return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index eec31e36aca7..51b8718513bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -7,6 +7,15 @@
#ifndef _INTEL_GPU_COMMANDS_H_
#define _INTEL_GPU_COMMANDS_H_
+#include <linux/bitops.h>
+
+/*
+ * Target address alignments required for GPU access e.g.
+ * MI_STORE_DWORD_IMM.
+ */
+#define alignof_dword 4
+#define alignof_qword 8
+
/*
* Instruction field definitions used by the command parser
*/
@@ -105,6 +114,7 @@
#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -112,6 +122,8 @@
#define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
#define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
+#define MI_SEMAPHORE_TOKEN_MASK REG_GENMASK(9, 5)
+#define MI_SEMAPHORE_TOKEN_SHIFT 5
#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1)
#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2)
#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */
@@ -125,7 +137,10 @@
* address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
*/
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
+/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
+#define MI_LRI_CS_MMIO (1<<19)
#define MI_LRI_FORCE_POSTED (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2)
#define MI_SRM_LRM_GLOBAL_GTT (1<<22)
@@ -140,6 +155,7 @@
#define MI_FLUSH_DW_USE_PPGTT (0<<2)
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
+#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
#define MI_BATCH_NON_SECURE (1)
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */
@@ -149,7 +165,8 @@
#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0)
#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
-#define MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
/*
* 3D instructions used by the kernel
@@ -179,11 +196,12 @@
#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2)
-#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2))
+#define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2))
#define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22)
-#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4)
-#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
-#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5)
+#define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22)
+#define GEN9_XY_FAST_COPY_BLT_CMD (2 << 29 | 0x42 << 22)
+#define XY_SRC_COPY_BLT_CMD (2 << 29 | 0x53 << 22)
+#define XY_MONO_SRC_COPY_IMM_BLT (2 << 29 | 0x71 << 22 | 5)
#define BLT_WRITE_A (2<<20)
#define BLT_WRITE_RGB (1<<20)
#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A)
@@ -200,6 +218,8 @@
#define DISPLAY_PLANE_A (0<<20)
#define DISPLAY_PLANE_B (1<<20)
#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2))
+#define PIPE_CONTROL_COMMAND_CACHE_INVALIDATE (1<<29) /* gen11+ */
+#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */
#define PIPE_CONTROL_FLUSH_L3 (1<<27)
#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */
#define PIPE_CONTROL_MMIO_WRITE (1<<23)
@@ -207,6 +227,7 @@
#define PIPE_CONTROL_CS_STALL (1<<20)
#define PIPE_CONTROL_TLB_INVALIDATE (1<<18)
#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16)
+#define PIPE_CONTROL_WRITE_TIMESTAMP (3<<14)
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
@@ -214,7 +235,9 @@
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */
+#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */
#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9)
+#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */
#define PIPE_CONTROL_NOTIFY (1<<8)
#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */
#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5)
@@ -225,6 +248,29 @@
#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0)
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
+#define MI_MATH(x) MI_INSTR(0x1a, (x) - 1)
+#define MI_MATH_INSTR(opcode, op1, op2) ((opcode) << 20 | (op1) << 10 | (op2))
+/* Opcodes for MI_MATH_INSTR */
+#define MI_MATH_NOOP MI_MATH_INSTR(0x000, 0x0, 0x0)
+#define MI_MATH_LOAD(op1, op2) MI_MATH_INSTR(0x080, op1, op2)
+#define MI_MATH_LOADINV(op1, op2) MI_MATH_INSTR(0x480, op1, op2)
+#define MI_MATH_LOAD0(op1) MI_MATH_INSTR(0x081, op1)
+#define MI_MATH_LOAD1(op1) MI_MATH_INSTR(0x481, op1)
+#define MI_MATH_ADD MI_MATH_INSTR(0x100, 0x0, 0x0)
+#define MI_MATH_SUB MI_MATH_INSTR(0x101, 0x0, 0x0)
+#define MI_MATH_AND MI_MATH_INSTR(0x102, 0x0, 0x0)
+#define MI_MATH_OR MI_MATH_INSTR(0x103, 0x0, 0x0)
+#define MI_MATH_XOR MI_MATH_INSTR(0x104, 0x0, 0x0)
+#define MI_MATH_STORE(op1, op2) MI_MATH_INSTR(0x180, op1, op2)
+#define MI_MATH_STOREINV(op1, op2) MI_MATH_INSTR(0x580, op1, op2)
+/* Registers used as operands in MI_MATH_INSTR */
+#define MI_MATH_REG(x) (x)
+#define MI_MATH_REG_SRCA 0x20
+#define MI_MATH_REG_SRCB 0x21
+#define MI_MATH_REG_ACCU 0x31
+#define MI_MATH_REG_ZF 0x32
+#define MI_MATH_REG_CF 0x33
+
/*
* Commands used only by the command parser
*/
@@ -241,7 +287,6 @@
#define MI_CLFLUSH MI_INSTR(0x27, 0)
#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0)
#define MI_REPORT_PERF_COUNT_GGTT (1<<0)
-#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0)
#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0)
#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0)
#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0)
@@ -276,4 +321,31 @@
#define COLOR_BLT ((0x2<<29)|(0x40<<22))
#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22))
+/*
+ * Used to convert any address to canonical form.
+ * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
+ * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
+ * addresses to be in a canonical form:
+ * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
+ * canonical form [63:48] == [47]."
+ */
+#define GEN8_HIGH_ADDRESS_BIT 47
+static inline u64 gen8_canonical_addr(u64 address)
+{
+ return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
+}
+
+static inline u64 gen8_noncanonical_addr(u64 address)
+{
+ return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0);
+}
+
+static inline u32 *__gen6_emit_bb_start(u32 *cs, u32 addr, unsigned int flags)
+{
+ *cs++ = MI_BATCH_BUFFER_START | flags;
+ *cs++ = addr;
+
+ return cs;
+}
+
#endif /* _INTEL_GPU_COMMANDS_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
new file mode 100644
index 000000000000..da2b6e2ae692
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -0,0 +1,658 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "debugfs_gt.h"
+#include "i915_drv.h"
+#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_mocs.h"
+#include "intel_rc6.h"
+#include "intel_renderstate.h"
+#include "intel_rps.h"
+#include "intel_uncore.h"
+#include "intel_pm.h"
+
+void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
+{
+ gt->i915 = i915;
+ gt->uncore = &i915->uncore;
+
+ spin_lock_init(&gt->irq_lock);
+
+ INIT_LIST_HEAD(&gt->closed_vma);
+ spin_lock_init(&gt->closed_lock);
+
+ intel_gt_init_reset(gt);
+ intel_gt_init_requests(gt);
+ intel_gt_init_timelines(gt);
+ intel_gt_pm_init_early(gt);
+
+ intel_rps_init_early(&gt->rps);
+ intel_uc_init_early(&gt->uc);
+}
+
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
+{
+ gt->ggtt = ggtt;
+}
+
+static void init_unused_ring(struct intel_gt *gt, u32 base)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ intel_uncore_write(uncore, RING_CTL(base), 0);
+ intel_uncore_write(uncore, RING_HEAD(base), 0);
+ intel_uncore_write(uncore, RING_TAIL(base), 0);
+ intel_uncore_write(uncore, RING_START(base), 0);
+}
+
+static void init_unused_rings(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (IS_I830(i915)) {
+ init_unused_ring(gt, PRB1_BASE);
+ init_unused_ring(gt, SRB0_BASE);
+ init_unused_ring(gt, SRB1_BASE);
+ init_unused_ring(gt, SRB2_BASE);
+ init_unused_ring(gt, SRB3_BASE);
+ } else if (IS_GEN(i915, 2)) {
+ init_unused_ring(gt, SRB0_BASE);
+ init_unused_ring(gt, SRB1_BASE);
+ } else if (IS_GEN(i915, 3)) {
+ init_unused_ring(gt, PRB1_BASE);
+ init_unused_ring(gt, PRB2_BASE);
+ }
+}
+
+int intel_gt_init_hw(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ int ret;
+
+ gt->last_init_time = ktime_get();
+
+ /* Double layer security blanket, see i915_gem_init() */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ if (HAS_EDRAM(i915) && INTEL_GEN(i915) < 9)
+ intel_uncore_rmw(uncore, HSW_IDICR, 0, IDIHASHMSK(0xf));
+
+ if (IS_HASWELL(i915))
+ intel_uncore_write(uncore,
+ MI_PREDICATE_RESULT_2,
+ IS_HSW_GT3(i915) ?
+ LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
+
+ /* Apply the GT workarounds... */
+ intel_gt_apply_workarounds(gt);
+ /* ...and determine whether they are sticking. */
+ intel_gt_verify_workarounds(gt, "init");
+
+ intel_gt_init_swizzling(gt);
+
+ /*
+ * At least 830 can leave some of the unused rings
+ * "active" (ie. head != tail) after resume which
+ * will prevent c3 entry. Makes sure all unused rings
+ * are totally idle.
+ */
+ init_unused_rings(gt);
+
+ ret = i915_ppgtt_init_hw(gt);
+ if (ret) {
+ DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
+ goto out;
+ }
+
+ /* We can't enable contexts until all firmware is loaded */
+ ret = intel_uc_init_hw(&gt->uc);
+ if (ret) {
+ i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
+ goto out;
+ }
+
+ intel_mocs_init(gt);
+
+out:
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ return ret;
+}
+
+static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
+{
+ intel_uncore_rmw(uncore, reg, 0, set);
+}
+
+static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
+{
+ intel_uncore_rmw(uncore, reg, clr, 0);
+}
+
+static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
+{
+ intel_uncore_rmw(uncore, reg, 0, 0);
+}
+
+static void gen8_clear_engine_error_register(struct intel_engine_cs *engine)
+{
+ GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
+ GEN6_RING_FAULT_REG_POSTING_READ(engine);
+}
+
+void
+intel_gt_clear_error_registers(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 eir;
+
+ if (!IS_GEN(i915, 2))
+ clear_register(uncore, PGTBL_ER);
+
+ if (INTEL_GEN(i915) < 4)
+ clear_register(uncore, IPEIR(RENDER_RING_BASE));
+ else
+ clear_register(uncore, IPEIR_I965);
+
+ clear_register(uncore, EIR);
+ eir = intel_uncore_read(uncore, EIR);
+ if (eir) {
+ /*
+ * some errors might have become stuck,
+ * mask them.
+ */
+ DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
+ rmw_set(uncore, EMR, eir);
+ intel_uncore_write(uncore, GEN2_IIR,
+ I915_MASTER_ERROR_INTERRUPT);
+ }
+
+ if (INTEL_GEN(i915) >= 12) {
+ rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
+ intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
+ } else if (INTEL_GEN(i915) >= 8) {
+ rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
+ intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
+ } else if (INTEL_GEN(i915) >= 6) {
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine_masked(engine, gt, engine_mask, id)
+ gen8_clear_engine_error_register(engine);
+ }
+}
+
+static void gen6_check_faults(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 fault;
+
+ for_each_engine(engine, gt, id) {
+ fault = GEN6_RING_FAULT_REG_READ(engine);
+ if (fault & RING_FAULT_VALID) {
+ DRM_DEBUG_DRIVER("Unexpected fault\n"
+ "\tAddr: 0x%08lx\n"
+ "\tAddress space: %s\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ fault & PAGE_MASK,
+ fault & RING_FAULT_GTTSEL_MASK ?
+ "GGTT" : "PPGTT",
+ RING_FAULT_SRCID(fault),
+ RING_FAULT_FAULT_TYPE(fault));
+ }
+ }
+}
+
+static void gen8_check_faults(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ i915_reg_t fault_reg, fault_data0_reg, fault_data1_reg;
+ u32 fault;
+
+ if (INTEL_GEN(gt->i915) >= 12) {
+ fault_reg = GEN12_RING_FAULT_REG;
+ fault_data0_reg = GEN12_FAULT_TLB_DATA0;
+ fault_data1_reg = GEN12_FAULT_TLB_DATA1;
+ } else {
+ fault_reg = GEN8_RING_FAULT_REG;
+ fault_data0_reg = GEN8_FAULT_TLB_DATA0;
+ fault_data1_reg = GEN8_FAULT_TLB_DATA1;
+ }
+
+ fault = intel_uncore_read(uncore, fault_reg);
+ if (fault & RING_FAULT_VALID) {
+ u32 fault_data0, fault_data1;
+ u64 fault_addr;
+
+ fault_data0 = intel_uncore_read(uncore, fault_data0_reg);
+ fault_data1 = intel_uncore_read(uncore, fault_data1_reg);
+
+ fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+ ((u64)fault_data0 << 12);
+
+ DRM_DEBUG_DRIVER("Unexpected fault\n"
+ "\tAddr: 0x%08x_%08x\n"
+ "\tAddress space: %s\n"
+ "\tEngine ID: %d\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ upper_32_bits(fault_addr),
+ lower_32_bits(fault_addr),
+ fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+ GEN8_RING_FAULT_ENGINE_ID(fault),
+ RING_FAULT_SRCID(fault),
+ RING_FAULT_FAULT_TYPE(fault));
+ }
+}
+
+void intel_gt_check_and_clear_faults(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ /* From GEN8 onwards we only have one 'All Engine Fault Register' */
+ if (INTEL_GEN(i915) >= 8)
+ gen8_check_faults(gt);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_check_faults(gt);
+ else
+ return;
+
+ intel_gt_clear_error_registers(gt, ALL_ENGINES);
+}
+
+void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ intel_wakeref_t wakeref;
+
+ /*
+ * No actual flushing is required for the GTT write domain for reads
+ * from the GTT domain. Writes to it "immediately" go to main memory
+ * as far as we know, so there's no chipset flush. It also doesn't
+ * land in the GPU render cache.
+ *
+ * However, we do have to enforce the order so that all writes through
+ * the GTT land before any writes to the device, such as updates to
+ * the GATT itself.
+ *
+ * We also have to wait a bit for the writes to land from the GTT.
+ * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+ * timing. This issue has only been observed when switching quickly
+ * between GTT writes and CPU reads from inside the kernel on recent hw,
+ * and it appears to only affect discrete GTT blocks (i.e. on LLC
+ * system agents we cannot reproduce this behaviour, until Cannonlake
+ * that was!).
+ */
+
+ wmb();
+
+ if (INTEL_INFO(gt->i915)->has_coherent_ggtt)
+ return;
+
+ intel_gt_chipset_flush(gt);
+
+ with_intel_runtime_pm_if_in_use(uncore->rpm, wakeref) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&uncore->lock, flags);
+ intel_uncore_posting_read_fw(uncore,
+ RING_HEAD(RENDER_RING_BASE));
+ spin_unlock_irqrestore(&uncore->lock, flags);
+ }
+}
+
+void intel_gt_chipset_flush(struct intel_gt *gt)
+{
+ wmb();
+ if (INTEL_GEN(gt->i915) < 6)
+ intel_gtt_chipset_flush();
+}
+
+void intel_gt_driver_register(struct intel_gt *gt)
+{
+ intel_rps_driver_register(&gt->rps);
+
+ debugfs_gt_register(gt);
+}
+
+static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
+
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj)) {
+ DRM_ERROR("Failed to allocate scratch page\n");
+ return PTR_ERR(obj);
+ }
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err_unref;
+ }
+
+ ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (ret)
+ goto err_unref;
+
+ gt->scratch = i915_vma_make_unshrinkable(vma);
+
+ return 0;
+
+err_unref:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
+static void intel_gt_fini_scratch(struct intel_gt *gt)
+{
+ i915_vma_unpin_and_release(&gt->scratch, 0);
+}
+
+static struct i915_address_space *kernel_vm(struct intel_gt *gt)
+{
+ if (INTEL_PPGTT(gt->i915) > INTEL_PPGTT_ALIASING)
+ return &i915_ppgtt_create(gt)->vm;
+ else
+ return i915_vm_get(&gt->ggtt->vm);
+}
+
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+ struct intel_timeline *tl;
+
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ intel_context_timeline_unlock(tl);
+ return 0;
+}
+
+static int __engines_record_defaults(struct intel_gt *gt)
+{
+ struct i915_request *requests[I915_NUM_ENGINES] = {};
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * As we reset the gpu during very early sanitisation, the current
+ * register state on the GPU should reflect its defaults values.
+ * We load a context onto the hw (with restore-inhibit), then switch
+ * over to a second context to save that default register state. We
+ * can then prime every new context with that state so they all start
+ * from the same default HW values.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct intel_renderstate so;
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ /* We must be able to switch to something! */
+ GEM_BUG_ON(!engine->kernel_context);
+
+ err = intel_renderstate_init(&so, engine);
+ if (err)
+ goto out;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ intel_context_put(ce);
+ goto out;
+ }
+
+ err = intel_engine_emit_ctx_wa(rq);
+ if (err)
+ goto err_rq;
+
+ err = intel_renderstate_emit(&so, rq);
+ if (err)
+ goto err_rq;
+
+err_rq:
+ requests[id] = i915_request_get(rq);
+ i915_request_add(rq);
+ intel_renderstate_fini(&so);
+ if (err)
+ goto out;
+ }
+
+ /* Flush the default context image to memory, and enable powersaving. */
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ err = -EIO;
+ goto out;
+ }
+
+ for (id = 0; id < ARRAY_SIZE(requests); id++) {
+ struct i915_request *rq;
+ struct i915_vma *state;
+ void *vaddr;
+
+ rq = requests[id];
+ if (!rq)
+ continue;
+
+ GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT, &rq->context->flags));
+ state = rq->context->state;
+ if (!state)
+ continue;
+
+ /* Serialise with retirement on another CPU */
+ GEM_BUG_ON(!i915_request_completed(rq));
+ err = __intel_context_flush_retire(rq->context);
+ if (err)
+ goto out;
+
+ /* We want to be able to unbind the state from the GGTT */
+ GEM_BUG_ON(intel_context_is_pinned(rq->context));
+
+ /*
+ * As we will hold a reference to the logical state, it will
+ * not be torn down with the context, and importantly the
+ * object will hold onto its vma (making it possible for a
+ * stray GTT write to corrupt our defaults). Unmap the vma
+ * from the GTT to prevent such accidents and reclaim the
+ * space.
+ */
+ err = i915_vma_unbind(state);
+ if (err)
+ goto out;
+
+ i915_gem_object_lock(state->obj);
+ err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+ i915_gem_object_unlock(state->obj);
+ if (err)
+ goto out;
+
+ i915_gem_object_set_cache_coherency(state->obj, I915_CACHE_LLC);
+
+ /* Check we can acquire the image of the context state */
+ vaddr = i915_gem_object_pin_map(state->obj, I915_MAP_FORCE_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out;
+ }
+
+ rq->engine->default_state = i915_gem_object_get(state->obj);
+ i915_gem_object_unpin_map(state->obj);
+ }
+
+out:
+ /*
+ * If we have to abandon now, we expect the engines to be idle
+ * and ready to be torn-down. The quickest way we can accomplish
+ * this is by declaring ourselves wedged.
+ */
+ if (err)
+ intel_gt_set_wedged(gt);
+
+ for (id = 0; id < ARRAY_SIZE(requests); id++) {
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ rq = requests[id];
+ if (!rq)
+ continue;
+
+ ce = rq->context;
+ i915_request_put(rq);
+ intel_context_put(ce);
+ }
+ return err;
+}
+
+static int __engines_verify_workarounds(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ if (intel_engine_verify_workarounds(engine, "load"))
+ err = -EIO;
+ }
+
+ return err;
+}
+
+static void __intel_gt_disable(struct intel_gt *gt)
+{
+ intel_gt_set_wedged_on_init(gt);
+
+ intel_gt_suspend_prepare(gt);
+ intel_gt_suspend_late(gt);
+
+ GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+}
+
+int intel_gt_init(struct intel_gt *gt)
+{
+ int err;
+
+ err = i915_inject_probe_error(gt->i915, -ENODEV);
+ if (err)
+ return err;
+
+ /*
+ * This is just a security blanket to placate dragons.
+ * On some systems, we very sporadically observe that the first TLBs
+ * used by the CS may be stale, despite us poking the TLB reset. If
+ * we hold the forcewake during initialisation these problems
+ * just magically go away.
+ */
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+
+ err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
+ if (err)
+ goto out_fw;
+
+ intel_gt_pm_init(gt);
+
+ gt->vm = kernel_vm(gt);
+ if (!gt->vm) {
+ err = -ENOMEM;
+ goto err_pm;
+ }
+
+ err = intel_engines_init(gt);
+ if (err)
+ goto err_engines;
+
+ intel_uc_init(&gt->uc);
+
+ err = intel_gt_resume(gt);
+ if (err)
+ goto err_uc_init;
+
+ err = __engines_record_defaults(gt);
+ if (err)
+ goto err_gt;
+
+ err = __engines_verify_workarounds(gt);
+ if (err)
+ goto err_gt;
+
+ err = i915_inject_probe_error(gt->i915, -EIO);
+ if (err)
+ goto err_gt;
+
+ goto out_fw;
+err_gt:
+ __intel_gt_disable(gt);
+ intel_uc_fini_hw(&gt->uc);
+err_uc_init:
+ intel_uc_fini(&gt->uc);
+err_engines:
+ intel_engines_release(gt);
+ i915_vm_put(fetch_and_zero(&gt->vm));
+err_pm:
+ intel_gt_pm_fini(gt);
+ intel_gt_fini_scratch(gt);
+out_fw:
+ if (err)
+ intel_gt_set_wedged_on_init(gt);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ return err;
+}
+
+void intel_gt_driver_remove(struct intel_gt *gt)
+{
+ __intel_gt_disable(gt);
+
+ intel_uc_fini_hw(&gt->uc);
+ intel_uc_fini(&gt->uc);
+
+ intel_engines_release(gt);
+}
+
+void intel_gt_driver_unregister(struct intel_gt *gt)
+{
+ intel_rps_driver_unregister(&gt->rps);
+}
+
+void intel_gt_driver_release(struct intel_gt *gt)
+{
+ struct i915_address_space *vm;
+
+ vm = fetch_and_zero(&gt->vm);
+ if (vm) /* FIXME being called twice on error paths :( */
+ i915_vm_put(vm);
+
+ intel_gt_pm_fini(gt);
+ intel_gt_fini_scratch(gt);
+}
+
+void intel_gt_driver_late_release(struct intel_gt *gt)
+{
+ intel_uc_driver_late_release(&gt->uc);
+ intel_gt_fini_requests(gt);
+ intel_gt_fini_reset(gt);
+ intel_gt_fini_timelines(gt);
+ intel_engines_free(gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
new file mode 100644
index 000000000000..1dac441cb8f4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_GT__
+#define __INTEL_GT__
+
+#include "intel_engine_types.h"
+#include "intel_gt_types.h"
+#include "intel_reset.h"
+
+struct drm_i915_private;
+
+#define GT_TRACE(gt, fmt, ...) do { \
+ const struct intel_gt *gt__ __maybe_unused = (gt); \
+ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \
+ ##__VA_ARGS__); \
+} while (0)
+
+static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
+{
+ return container_of(uc, struct intel_gt, uc);
+}
+
+static inline struct intel_gt *guc_to_gt(struct intel_guc *guc)
+{
+ return container_of(guc, struct intel_gt, uc.guc);
+}
+
+static inline struct intel_gt *huc_to_gt(struct intel_huc *huc)
+{
+ return container_of(huc, struct intel_gt, uc.huc);
+}
+
+void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915);
+void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt);
+int __must_check intel_gt_init_hw(struct intel_gt *gt);
+int intel_gt_init(struct intel_gt *gt);
+void intel_gt_driver_register(struct intel_gt *gt);
+
+void intel_gt_driver_unregister(struct intel_gt *gt);
+void intel_gt_driver_remove(struct intel_gt *gt);
+void intel_gt_driver_release(struct intel_gt *gt);
+
+void intel_gt_driver_late_release(struct intel_gt *gt);
+
+void intel_gt_check_and_clear_faults(struct intel_gt *gt);
+void intel_gt_clear_error_registers(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask);
+
+void intel_gt_flush_ggtt_writes(struct intel_gt *gt);
+void intel_gt_chipset_flush(struct intel_gt *gt);
+
+static inline u32 intel_gt_scratch_offset(const struct intel_gt *gt,
+ enum intel_gt_scratch_field field)
+{
+ return i915_ggtt_offset(gt->scratch) + field;
+}
+
+static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
+{
+ return __intel_reset_failed(&gt->reset);
+}
+
+static inline bool intel_gt_has_init_error(const struct intel_gt *gt)
+{
+ return test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
+#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
new file mode 100644
index 000000000000..f796bdf1ed30
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c
@@ -0,0 +1,456 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/sched/clock.h>
+
+#include "i915_drv.h"
+#include "i915_irq.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_uncore.h"
+#include "intel_rps.h"
+
+static void guc_irq_handler(struct intel_guc *guc, u16 iir)
+{
+ if (iir & GUC_INTR_GUC2HOST)
+ intel_guc_to_host_event_handler(guc);
+}
+
+static void
+cs_irq_handler(struct intel_engine_cs *engine, u32 iir)
+{
+ bool tasklet = false;
+
+ if (iir & GT_CONTEXT_SWITCH_INTERRUPT)
+ tasklet = true;
+
+ if (iir & GT_RENDER_USER_INTERRUPT) {
+ intel_engine_signal_breadcrumbs(engine);
+ tasklet |= intel_engine_needs_breadcrumb_tasklet(engine);
+ }
+
+ if (tasklet)
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+}
+
+static u32
+gen11_gt_engine_identity(struct intel_gt *gt,
+ const unsigned int bank, const unsigned int bit)
+{
+ void __iomem * const regs = gt->uncore->regs;
+ u32 timeout_ts;
+ u32 ident;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit));
+
+ /*
+ * NB: Specs do not specify how long to spin wait,
+ * so we do ~100us as an educated guess.
+ */
+ timeout_ts = (local_clock() >> 10) + 100;
+ do {
+ ident = raw_reg_read(regs, GEN11_INTR_IDENTITY_REG(bank));
+ } while (!(ident & GEN11_INTR_DATA_VALID) &&
+ !time_after32(local_clock() >> 10, timeout_ts));
+
+ if (unlikely(!(ident & GEN11_INTR_DATA_VALID))) {
+ DRM_ERROR("INTR_IDENTITY_REG%u:%u 0x%08x not valid!\n",
+ bank, bit, ident);
+ return 0;
+ }
+
+ raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank),
+ GEN11_INTR_DATA_VALID);
+
+ return ident;
+}
+
+static void
+gen11_other_irq_handler(struct intel_gt *gt, const u8 instance,
+ const u16 iir)
+{
+ if (instance == OTHER_GUC_INSTANCE)
+ return guc_irq_handler(&gt->uc.guc, iir);
+
+ if (instance == OTHER_GTPM_INSTANCE)
+ return gen11_rps_irq_handler(&gt->rps, iir);
+
+ WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n",
+ instance, iir);
+}
+
+static void
+gen11_engine_irq_handler(struct intel_gt *gt, const u8 class,
+ const u8 instance, const u16 iir)
+{
+ struct intel_engine_cs *engine;
+
+ if (instance <= MAX_ENGINE_INSTANCE)
+ engine = gt->engine_class[class][instance];
+ else
+ engine = NULL;
+
+ if (likely(engine))
+ return cs_irq_handler(engine, iir);
+
+ WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n",
+ class, instance);
+}
+
+static void
+gen11_gt_identity_handler(struct intel_gt *gt, const u32 identity)
+{
+ const u8 class = GEN11_INTR_ENGINE_CLASS(identity);
+ const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity);
+ const u16 intr = GEN11_INTR_ENGINE_INTR(identity);
+
+ if (unlikely(!intr))
+ return;
+
+ if (class <= COPY_ENGINE_CLASS)
+ return gen11_engine_irq_handler(gt, class, instance, intr);
+
+ if (class == OTHER_CLASS)
+ return gen11_other_irq_handler(gt, instance, intr);
+
+ WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n",
+ class, instance, intr);
+}
+
+static void
+gen11_gt_bank_handler(struct intel_gt *gt, const unsigned int bank)
+{
+ void __iomem * const regs = gt->uncore->regs;
+ unsigned long intr_dw;
+ unsigned int bit;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank));
+
+ for_each_set_bit(bit, &intr_dw, 32) {
+ const u32 ident = gen11_gt_engine_identity(gt, bank, bit);
+
+ gen11_gt_identity_handler(gt, ident);
+ }
+
+ /* Clear must be after shared has been served for engine */
+ raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw);
+}
+
+void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl)
+{
+ unsigned int bank;
+
+ spin_lock(&gt->irq_lock);
+
+ for (bank = 0; bank < 2; bank++) {
+ if (master_ctl & GEN11_GT_DW_IRQ(bank))
+ gen11_gt_bank_handler(gt, bank);
+ }
+
+ spin_unlock(&gt->irq_lock);
+}
+
+bool gen11_gt_reset_one_iir(struct intel_gt *gt,
+ const unsigned int bank, const unsigned int bit)
+{
+ void __iomem * const regs = gt->uncore->regs;
+ u32 dw;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank));
+ if (dw & BIT(bit)) {
+ /*
+ * According to the BSpec, DW_IIR bits cannot be cleared without
+ * first servicing the Selector & Shared IIR registers.
+ */
+ gen11_gt_engine_identity(gt, bank, bit);
+
+ /*
+ * We locked GT INT DW by reading it. If we want to (try
+ * to) recover from this successfully, we need to clear
+ * our bit, otherwise we are locking the register for
+ * everybody.
+ */
+ raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit));
+
+ return true;
+ }
+
+ return false;
+}
+
+void gen11_gt_irq_reset(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ /* Disable RCS, BCS, VCS and VECS class engines. */
+ intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0);
+
+ /* Restore masks irqs on RCS, BCS, VCS and VECS engines. */
+ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~0);
+ intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~0);
+ intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~0);
+ intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~0);
+ intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~0);
+
+ intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
+ intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+}
+
+void gen11_gt_irq_postinstall(struct intel_gt *gt)
+{
+ const u32 irqs = GT_RENDER_USER_INTERRUPT | GT_CONTEXT_SWITCH_INTERRUPT;
+ struct intel_uncore *uncore = gt->uncore;
+ const u32 dmask = irqs << 16 | irqs;
+ const u32 smask = irqs << 16;
+
+ BUILD_BUG_ON(irqs & 0xffff0000);
+
+ /* Enable RCS, BCS, VCS and VECS class interrupts. */
+ intel_uncore_write(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask);
+ intel_uncore_write(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask);
+
+ /* Unmask irqs on RCS, BCS, VCS and VECS engines. */
+ intel_uncore_write(uncore, GEN11_RCS0_RSVD_INTR_MASK, ~smask);
+ intel_uncore_write(uncore, GEN11_BCS_RSVD_INTR_MASK, ~smask);
+ intel_uncore_write(uncore, GEN11_VCS0_VCS1_INTR_MASK, ~dmask);
+ intel_uncore_write(uncore, GEN11_VCS2_VCS3_INTR_MASK, ~dmask);
+ intel_uncore_write(uncore, GEN11_VECS0_VECS1_INTR_MASK, ~dmask);
+
+ /*
+ * RPS interrupts will get enabled/disabled on demand when RPS itself
+ * is enabled/disabled.
+ */
+ gt->pm_ier = 0x0;
+ gt->pm_imr = ~gt->pm_ier;
+ intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK, ~0);
+
+ /* Same thing for GuC interrupts */
+ intel_uncore_write(uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
+ intel_uncore_write(uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+}
+
+void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
+{
+ if (gt_iir & GT_RENDER_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+ if (gt_iir & ILK_BSD_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+}
+
+static void gen7_parity_error_irq_handler(struct intel_gt *gt, u32 iir)
+{
+ if (!HAS_L3_DPF(gt->i915))
+ return;
+
+ spin_lock(&gt->irq_lock);
+ gen5_gt_disable_irq(gt, GT_PARITY_ERROR(gt->i915));
+ spin_unlock(&gt->irq_lock);
+
+ if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1)
+ gt->i915->l3_parity.which_slice |= 1 << 1;
+
+ if (iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
+ gt->i915->l3_parity.which_slice |= 1 << 0;
+
+ schedule_work(&gt->i915->l3_parity.error_work);
+}
+
+void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir)
+{
+ if (gt_iir & GT_RENDER_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine_class[RENDER_CLASS][0]);
+ if (gt_iir & GT_BSD_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine_class[VIDEO_DECODE_CLASS][0]);
+ if (gt_iir & GT_BLT_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine_class[COPY_ENGINE_CLASS][0]);
+
+ if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
+ GT_BSD_CS_ERROR_INTERRUPT |
+ GT_RENDER_CS_MASTER_ERROR_INTERRUPT))
+ DRM_DEBUG("Command parser error, gt_iir 0x%08x\n", gt_iir);
+
+ if (gt_iir & GT_PARITY_ERROR(gt->i915))
+ gen7_parity_error_irq_handler(gt, gt_iir);
+}
+
+void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
+{
+ void __iomem * const regs = gt->uncore->regs;
+
+ if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
+ gt_iir[0] = raw_reg_read(regs, GEN8_GT_IIR(0));
+ if (likely(gt_iir[0]))
+ raw_reg_write(regs, GEN8_GT_IIR(0), gt_iir[0]);
+ }
+
+ if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
+ gt_iir[1] = raw_reg_read(regs, GEN8_GT_IIR(1));
+ if (likely(gt_iir[1]))
+ raw_reg_write(regs, GEN8_GT_IIR(1), gt_iir[1]);
+ }
+
+ if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
+ gt_iir[2] = raw_reg_read(regs, GEN8_GT_IIR(2));
+ if (likely(gt_iir[2]))
+ raw_reg_write(regs, GEN8_GT_IIR(2), gt_iir[2]);
+ }
+
+ if (master_ctl & GEN8_GT_VECS_IRQ) {
+ gt_iir[3] = raw_reg_read(regs, GEN8_GT_IIR(3));
+ if (likely(gt_iir[3]))
+ raw_reg_write(regs, GEN8_GT_IIR(3), gt_iir[3]);
+ }
+}
+
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4])
+{
+ if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
+ cs_irq_handler(gt->engine_class[RENDER_CLASS][0],
+ gt_iir[0] >> GEN8_RCS_IRQ_SHIFT);
+ cs_irq_handler(gt->engine_class[COPY_ENGINE_CLASS][0],
+ gt_iir[0] >> GEN8_BCS_IRQ_SHIFT);
+ }
+
+ if (master_ctl & (GEN8_GT_VCS0_IRQ | GEN8_GT_VCS1_IRQ)) {
+ cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][0],
+ gt_iir[1] >> GEN8_VCS0_IRQ_SHIFT);
+ cs_irq_handler(gt->engine_class[VIDEO_DECODE_CLASS][1],
+ gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT);
+ }
+
+ if (master_ctl & GEN8_GT_VECS_IRQ) {
+ cs_irq_handler(gt->engine_class[VIDEO_ENHANCEMENT_CLASS][0],
+ gt_iir[3] >> GEN8_VECS_IRQ_SHIFT);
+ }
+
+ if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
+ gen6_rps_irq_handler(&gt->rps, gt_iir[2]);
+ guc_irq_handler(&gt->uc.guc, gt_iir[2] >> 16);
+ }
+}
+
+void gen8_gt_irq_reset(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ GEN8_IRQ_RESET_NDX(uncore, GT, 0);
+ GEN8_IRQ_RESET_NDX(uncore, GT, 1);
+ GEN8_IRQ_RESET_NDX(uncore, GT, 2);
+ GEN8_IRQ_RESET_NDX(uncore, GT, 3);
+}
+
+void gen8_gt_irq_postinstall(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ /* These are interrupts we'll toggle with the ring mask register */
+ u32 gt_interrupts[] = {
+ (GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
+ GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
+ GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT |
+ GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT),
+
+ (GT_RENDER_USER_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
+ GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS0_IRQ_SHIFT |
+ GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
+ GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT),
+
+ 0,
+
+ (GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT |
+ GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT)
+ };
+
+ gt->pm_ier = 0x0;
+ gt->pm_imr = ~gt->pm_ier;
+ GEN8_IRQ_INIT_NDX(uncore, GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
+ GEN8_IRQ_INIT_NDX(uncore, GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
+ /*
+ * RPS interrupts will get enabled/disabled on demand when RPS itself
+ * is enabled/disabled. Same wil be the case for GuC interrupts.
+ */
+ GEN8_IRQ_INIT_NDX(uncore, GT, 2, gt->pm_imr, gt->pm_ier);
+ GEN8_IRQ_INIT_NDX(uncore, GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
+}
+
+static void gen5_gt_update_irq(struct intel_gt *gt,
+ u32 interrupt_mask,
+ u32 enabled_irq_mask)
+{
+ lockdep_assert_held(&gt->irq_lock);
+
+ GEM_BUG_ON(enabled_irq_mask & ~interrupt_mask);
+
+ gt->gt_imr &= ~interrupt_mask;
+ gt->gt_imr |= (~enabled_irq_mask & interrupt_mask);
+ intel_uncore_write(gt->uncore, GTIMR, gt->gt_imr);
+}
+
+void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask)
+{
+ gen5_gt_update_irq(gt, mask, mask);
+ intel_uncore_posting_read_fw(gt->uncore, GTIMR);
+}
+
+void gen5_gt_disable_irq(struct intel_gt *gt, u32 mask)
+{
+ gen5_gt_update_irq(gt, mask, 0);
+}
+
+void gen5_gt_irq_reset(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+
+ GEN3_IRQ_RESET(uncore, GT);
+ if (INTEL_GEN(gt->i915) >= 6)
+ GEN3_IRQ_RESET(uncore, GEN6_PM);
+}
+
+void gen5_gt_irq_postinstall(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ u32 pm_irqs = 0;
+ u32 gt_irqs = 0;
+
+ gt->gt_imr = ~0;
+ if (HAS_L3_DPF(gt->i915)) {
+ /* L3 parity interrupt is always unmasked. */
+ gt->gt_imr = ~GT_PARITY_ERROR(gt->i915);
+ gt_irqs |= GT_PARITY_ERROR(gt->i915);
+ }
+
+ gt_irqs |= GT_RENDER_USER_INTERRUPT;
+ if (IS_GEN(gt->i915, 5))
+ gt_irqs |= ILK_BSD_USER_INTERRUPT;
+ else
+ gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
+
+ GEN3_IRQ_INIT(uncore, GT, gt->gt_imr, gt_irqs);
+
+ if (INTEL_GEN(gt->i915) >= 6) {
+ /*
+ * RPS interrupts will get enabled/disabled on demand when RPS
+ * itself is enabled/disabled.
+ */
+ if (HAS_ENGINE(gt->i915, VECS0)) {
+ pm_irqs |= PM_VEBOX_USER_INTERRUPT;
+ gt->pm_ier |= PM_VEBOX_USER_INTERRUPT;
+ }
+
+ gt->pm_imr = 0xffffffff;
+ GEN3_IRQ_INIT(uncore, GEN6_PM, gt->pm_imr, pm_irqs);
+ }
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
new file mode 100644
index 000000000000..8f37593712c9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.h
@@ -0,0 +1,44 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_IRQ_H
+#define INTEL_GT_IRQ_H
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+#define GEN8_GT_IRQS (GEN8_GT_RCS_IRQ | \
+ GEN8_GT_BCS_IRQ | \
+ GEN8_GT_VCS0_IRQ | \
+ GEN8_GT_VCS1_IRQ | \
+ GEN8_GT_VECS_IRQ | \
+ GEN8_GT_PM_IRQ | \
+ GEN8_GT_GUC_IRQ)
+
+void gen11_gt_irq_reset(struct intel_gt *gt);
+void gen11_gt_irq_postinstall(struct intel_gt *gt);
+void gen11_gt_irq_handler(struct intel_gt *gt, const u32 master_ctl);
+
+bool gen11_gt_reset_one_iir(struct intel_gt *gt,
+ const unsigned int bank,
+ const unsigned int bit);
+
+void gen5_gt_irq_handler(struct intel_gt *gt, u32 gt_iir);
+
+void gen5_gt_irq_postinstall(struct intel_gt *gt);
+void gen5_gt_irq_reset(struct intel_gt *gt);
+void gen5_gt_disable_irq(struct intel_gt *gt, u32 mask);
+void gen5_gt_enable_irq(struct intel_gt *gt, u32 mask);
+
+void gen6_gt_irq_handler(struct intel_gt *gt, u32 gt_iir);
+
+void gen8_gt_irq_ack(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
+void gen8_gt_irq_reset(struct intel_gt *gt);
+void gen8_gt_irq_handler(struct intel_gt *gt, u32 master_ctl, u32 gt_iir[4]);
+void gen8_gt_irq_postinstall(struct intel_gt *gt);
+
+#endif /* INTEL_GT_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 9f8f7f54191f..d1c2f034296a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -4,23 +4,48 @@
* Copyright © 2019 Intel Corporation
*/
+#include <linux/suspend.h>
+
#include "i915_drv.h"
+#include "i915_globals.h"
+#include "i915_params.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
+#include "intel_gt.h"
#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_llc.h"
#include "intel_pm.h"
+#include "intel_rc6.h"
+#include "intel_rps.h"
#include "intel_wakeref.h"
-static void pm_notify(struct drm_i915_private *i915, int state)
+static void user_forcewake(struct intel_gt *gt, bool suspend)
{
- blocking_notifier_call_chain(&i915->gt.pm_notifications, state, i915);
+ int count = atomic_read(&gt->user_wakeref);
+
+ /* Inside suspend/resume so single threaded, no races to worry about. */
+ if (likely(!count))
+ return;
+
+ intel_gt_pm_get(gt);
+ if (suspend) {
+ GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
+ atomic_sub(count, &gt->wakeref.count);
+ } else {
+ atomic_add(count, &gt->wakeref.count);
+ }
+ intel_gt_pm_put(gt);
}
-static int intel_gt_unpark(struct intel_wakeref *wf)
+static int __gt_unpark(struct intel_wakeref *wf)
{
- struct drm_i915_private *i915 =
- container_of(wf, typeof(*i915), gt.wakeref);
+ struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
+ struct drm_i915_private *i915 = gt->i915;
+
+ GT_TRACE(gt, "\n");
- GEM_TRACE("\n");
+ i915_globals_unpark();
/*
* It seems that the DMC likes to transition between the DC states a lot
@@ -33,97 +58,132 @@ static int intel_gt_unpark(struct intel_wakeref *wf)
* Work around it by grabbing a GT IRQ power domain whilst there is any
* GT activity, preventing any DC state transitions.
*/
- i915->gt.awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
- GEM_BUG_ON(!i915->gt.awake);
-
- intel_enable_gt_powersave(i915);
-
- i915_update_gfx_val(i915);
- if (INTEL_GEN(i915) >= 6)
- gen6_rps_busy(i915);
+ gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+ GEM_BUG_ON(!gt->awake);
+ intel_rc6_unpark(&gt->rc6);
+ intel_rps_unpark(&gt->rps);
i915_pmu_gt_unparked(i915);
- i915_queue_hangcheck(i915);
-
- pm_notify(i915, INTEL_GT_UNPARK);
+ intel_gt_unpark_requests(gt);
return 0;
}
-void intel_gt_pm_get(struct drm_i915_private *i915)
+static int __gt_park(struct intel_wakeref *wf)
{
- intel_wakeref_get(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_unpark);
-}
+ struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
+ intel_wakeref_t wakeref = fetch_and_zero(&gt->awake);
+ struct drm_i915_private *i915 = gt->i915;
-static int intel_gt_park(struct intel_wakeref *wf)
-{
- struct drm_i915_private *i915 =
- container_of(wf, typeof(*i915), gt.wakeref);
- intel_wakeref_t wakeref = fetch_and_zero(&i915->gt.awake);
+ GT_TRACE(gt, "\n");
- GEM_TRACE("\n");
-
- pm_notify(i915, INTEL_GT_PARK);
+ intel_gt_park_requests(gt);
+ i915_vma_parked(gt);
i915_pmu_gt_parked(i915);
- if (INTEL_GEN(i915) >= 6)
- gen6_rps_idle(i915);
+ intel_rps_park(&gt->rps);
+ intel_rc6_park(&gt->rc6);
+
+ /* Everything switched off, flush any residual interrupt just in case */
+ intel_synchronize_irq(i915);
+ /* Defer dropping the display power well for 100ms, it's slow! */
GEM_BUG_ON(!wakeref);
- intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+
+ i915_globals_park();
return 0;
}
-void intel_gt_pm_put(struct drm_i915_private *i915)
+static const struct intel_wakeref_ops wf_ops = {
+ .get = __gt_unpark,
+ .put = __gt_park,
+};
+
+void intel_gt_pm_init_early(struct intel_gt *gt)
{
- intel_wakeref_put(&i915->runtime_pm, &i915->gt.wakeref, intel_gt_park);
+ intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
}
-void intel_gt_pm_init(struct drm_i915_private *i915)
+void intel_gt_pm_init(struct intel_gt *gt)
{
- intel_wakeref_init(&i915->gt.wakeref);
- BLOCKING_INIT_NOTIFIER_HEAD(&i915->gt.pm_notifications);
+ /*
+ * Enabling power-management should be "self-healing". If we cannot
+ * enable a feature, simply leave it disabled with a notice to the
+ * user.
+ */
+ intel_rc6_init(&gt->rc6);
+ intel_rps_init(&gt->rps);
}
-static bool reset_engines(struct drm_i915_private *i915)
+static bool reset_engines(struct intel_gt *gt)
{
- if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+ if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
return false;
- return intel_gpu_reset(i915, ALL_ENGINES) == 0;
+ return __intel_gt_reset(gt, ALL_ENGINES) == 0;
}
-/**
- * intel_gt_sanitize: called after the GPU has lost power
- * @i915: the i915 device
- * @force: ignore a failed reset and sanitize engine state anyway
- *
- * Anytime we reset the GPU, either with an explicit GPU reset or through a
- * PCI power cycle, the GPU loses state and we must reset our state tracking
- * to match. Note that calling intel_gt_sanitize() if the GPU has not
- * been reset results in much confusion!
- */
-void intel_gt_sanitize(struct drm_i915_private *i915, bool force)
+static void gt_sanitize(struct intel_gt *gt, bool force)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
- GEM_TRACE("\n");
+ GT_TRACE(gt, "force:%s", yesno(force));
- if (!reset_engines(i915) && !force)
- return;
+ /* Use a raw wakeref to avoid calling intel_display_power_get early */
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
- for_each_engine(engine, i915, id)
- intel_engine_reset(engine, false);
+ /*
+ * As we have just resumed the machine and woken the device up from
+ * deep PCI sleep (presumably D3_cold), assume the HW has been reset
+ * back to defaults, recovering from whatever wedged state we left it
+ * in and so worth trying to use the device once more.
+ */
+ if (intel_gt_is_wedged(gt))
+ intel_gt_unset_wedged(gt);
+
+ intel_uc_sanitize(&gt->uc);
+
+ for_each_engine(engine, gt, id)
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
+
+ intel_uc_reset_prepare(&gt->uc);
+
+ if (reset_engines(gt) || force) {
+ for_each_engine(engine, gt, id)
+ __intel_engine_reset(engine, false);
+ }
+
+ for_each_engine(engine, gt, id)
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
+
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+}
+
+void intel_gt_pm_fini(struct intel_gt *gt)
+{
+ intel_rc6_fini(&gt->rc6);
}
-int intel_gt_resume(struct drm_i915_private *i915)
+int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int err = 0;
+ int err;
+
+ err = intel_gt_has_init_error(gt);
+ if (err)
+ return err;
+
+ GT_TRACE(gt, "\n");
/*
* After resume, we may need to poke into the pinned kernel
@@ -131,32 +191,143 @@ int intel_gt_resume(struct drm_i915_private *i915)
* Only the kernel contexts should remain pinned over suspend,
* allowing us to fixup the user contexts on their first pin.
*/
- intel_gt_pm_get(i915);
- for_each_engine(engine, i915, id) {
- struct intel_context *ce;
+ intel_gt_pm_get(gt);
+
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ intel_rc6_sanitize(&gt->rc6);
+ gt_sanitize(gt, true);
+ if (intel_gt_is_wedged(gt)) {
+ err = -EIO;
+ goto out_fw;
+ }
- intel_engine_pm_get(engine);
+ /* Only when the HW is re-initialised, can we replay the requests */
+ err = intel_gt_init_hw(gt);
+ if (err) {
+ dev_err(gt->i915->drm.dev,
+ "Failed to initialize GPU, declaring it wedged!\n");
+ goto err_wedged;
+ }
- ce = engine->kernel_context;
- if (ce)
- ce->ops->reset(ce);
+ intel_rps_enable(&gt->rps);
+ intel_llc_enable(&gt->llc);
- ce = engine->preempt_context;
- if (ce)
- ce->ops->reset(ce);
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
engine->serial++; /* kernel context lost */
err = engine->resume(engine);
intel_engine_pm_put(engine);
if (err) {
- dev_err(i915->drm.dev,
+ dev_err(gt->i915->drm.dev,
"Failed to restart %s (%d)\n",
engine->name, err);
- break;
+ goto err_wedged;
}
}
- intel_gt_pm_put(i915);
+ intel_rc6_enable(&gt->rc6);
+
+ intel_uc_resume(&gt->uc);
+
+ user_forcewake(gt, false);
+
+out_fw:
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
+ intel_gt_pm_put(gt);
return err;
+
+err_wedged:
+ intel_gt_set_wedged(gt);
+ goto out_fw;
+}
+
+static void wait_for_suspend(struct intel_gt *gt)
+{
+ if (!intel_gt_pm_is_awake(gt))
+ return;
+
+ if (intel_gt_wait_for_idle(gt, I915_GEM_IDLE_TIMEOUT) == -ETIME) {
+ /*
+ * Forcibly cancel outstanding work and leave
+ * the gpu quiet.
+ */
+ intel_gt_set_wedged(gt);
+ intel_gt_retire_requests(gt);
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+}
+
+void intel_gt_suspend_prepare(struct intel_gt *gt)
+{
+ user_forcewake(gt, true);
+ wait_for_suspend(gt);
+
+ intel_uc_suspend(&gt->uc);
+}
+
+static suspend_state_t pm_suspend_target(void)
+{
+#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP)
+ return pm_suspend_target_state;
+#else
+ return PM_SUSPEND_TO_IDLE;
+#endif
+}
+
+void intel_gt_suspend_late(struct intel_gt *gt)
+{
+ intel_wakeref_t wakeref;
+
+ /* We expect to be idle already; but also want to be independent */
+ wait_for_suspend(gt);
+
+ if (is_mock_gt(gt))
+ return;
+
+ GEM_BUG_ON(gt->awake);
+
+ /*
+ * On disabling the device, we want to turn off HW access to memory
+ * that we no longer own.
+ *
+ * However, not all suspend-states disable the device. S0 (s2idle)
+ * is effectively runtime-suspend, the device is left powered on
+ * but needs to be put into a low power state. We need to keep
+ * powermanagement enabled, but we also retain system state and so
+ * it remains safe to keep on using our allocated memory.
+ */
+ if (pm_suspend_target() == PM_SUSPEND_TO_IDLE)
+ return;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
+ intel_rps_disable(&gt->rps);
+ intel_rc6_disable(&gt->rc6);
+ intel_llc_disable(&gt->llc);
+ }
+
+ gt_sanitize(gt, false);
+
+ GT_TRACE(gt, "\n");
+}
+
+void intel_gt_runtime_suspend(struct intel_gt *gt)
+{
+ intel_uc_runtime_suspend(&gt->uc);
+
+ GT_TRACE(gt, "\n");
}
+
+int intel_gt_runtime_resume(struct intel_gt *gt)
+{
+ GT_TRACE(gt, "\n");
+ intel_gt_init_swizzling(gt);
+
+ return intel_uc_runtime_resume(&gt->uc);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index 53f342b20181..60f0e2fbe55c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -9,19 +9,58 @@
#include <linux/types.h>
-struct drm_i915_private;
+#include "intel_gt_types.h"
+#include "intel_wakeref.h"
-enum {
- INTEL_GT_UNPARK,
- INTEL_GT_PARK,
-};
+static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
+{
+ return intel_wakeref_is_active(&gt->wakeref);
+}
-void intel_gt_pm_get(struct drm_i915_private *i915);
-void intel_gt_pm_put(struct drm_i915_private *i915);
+static inline void intel_gt_pm_get(struct intel_gt *gt)
+{
+ intel_wakeref_get(&gt->wakeref);
+}
-void intel_gt_pm_init(struct drm_i915_private *i915);
+static inline void __intel_gt_pm_get(struct intel_gt *gt)
+{
+ __intel_wakeref_get(&gt->wakeref);
+}
-void intel_gt_sanitize(struct drm_i915_private *i915, bool force);
-int intel_gt_resume(struct drm_i915_private *i915);
+static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+{
+ return intel_wakeref_get_if_active(&gt->wakeref);
+}
+
+static inline void intel_gt_pm_put(struct intel_gt *gt)
+{
+ intel_wakeref_put(&gt->wakeref);
+}
+
+static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+{
+ intel_wakeref_put_async(&gt->wakeref);
+}
+
+static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
+{
+ return intel_wakeref_wait_for_idle(&gt->wakeref);
+}
+
+void intel_gt_pm_init_early(struct intel_gt *gt);
+void intel_gt_pm_init(struct intel_gt *gt);
+void intel_gt_pm_fini(struct intel_gt *gt);
+
+void intel_gt_suspend_prepare(struct intel_gt *gt);
+void intel_gt_suspend_late(struct intel_gt *gt);
+int intel_gt_resume(struct intel_gt *gt);
+
+void intel_gt_runtime_suspend(struct intel_gt *gt);
+int intel_gt_runtime_resume(struct intel_gt *gt);
+
+static inline bool is_mock_gt(const struct intel_gt *gt)
+{
+ return I915_SELFTEST_ONLY(gt->awake == -ENODEV);
+}
#endif /* INTEL_GT_PM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c
new file mode 100644
index 000000000000..babe866126d7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.c
@@ -0,0 +1,109 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+
+static void write_pm_imr(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 mask = gt->pm_imr;
+ i915_reg_t reg;
+
+ if (INTEL_GEN(i915) >= 11) {
+ reg = GEN11_GPM_WGBOXPERF_INTR_MASK;
+ mask <<= 16; /* pm is in upper half */
+ } else if (INTEL_GEN(i915) >= 8) {
+ reg = GEN8_GT_IMR(2);
+ } else {
+ reg = GEN6_PMIMR;
+ }
+
+ intel_uncore_write(uncore, reg, mask);
+}
+
+static void gen6_gt_pm_update_irq(struct intel_gt *gt,
+ u32 interrupt_mask,
+ u32 enabled_irq_mask)
+{
+ u32 new_val;
+
+ WARN_ON(enabled_irq_mask & ~interrupt_mask);
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ new_val = gt->pm_imr;
+ new_val &= ~interrupt_mask;
+ new_val |= ~enabled_irq_mask & interrupt_mask;
+
+ if (new_val != gt->pm_imr) {
+ gt->pm_imr = new_val;
+ write_pm_imr(gt);
+ }
+}
+
+void gen6_gt_pm_unmask_irq(struct intel_gt *gt, u32 mask)
+{
+ gen6_gt_pm_update_irq(gt, mask, mask);
+}
+
+void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask)
+{
+ gen6_gt_pm_update_irq(gt, mask, 0);
+}
+
+void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ i915_reg_t reg = INTEL_GEN(gt->i915) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ intel_uncore_write(uncore, reg, reset_mask);
+ intel_uncore_write(uncore, reg, reset_mask);
+ intel_uncore_posting_read(uncore, reg);
+}
+
+static void write_pm_ier(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ u32 mask = gt->pm_ier;
+ i915_reg_t reg;
+
+ if (INTEL_GEN(i915) >= 11) {
+ reg = GEN11_GPM_WGBOXPERF_INTR_ENABLE;
+ mask <<= 16; /* pm is in upper half */
+ } else if (INTEL_GEN(i915) >= 8) {
+ reg = GEN8_GT_IER(2);
+ } else {
+ reg = GEN6_PMIER;
+ }
+
+ intel_uncore_write(uncore, reg, mask);
+}
+
+void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask)
+{
+ lockdep_assert_held(&gt->irq_lock);
+
+ gt->pm_ier |= enable_mask;
+ write_pm_ier(gt);
+ gen6_gt_pm_unmask_irq(gt, enable_mask);
+}
+
+void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask)
+{
+ lockdep_assert_held(&gt->irq_lock);
+
+ gt->pm_ier &= ~disable_mask;
+ gen6_gt_pm_mask_irq(gt, disable_mask);
+ write_pm_ier(gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h
new file mode 100644
index 000000000000..b29816a04809
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_irq.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_PM_IRQ_H
+#define INTEL_GT_PM_IRQ_H
+
+#include <linux/types.h>
+
+struct intel_gt;
+
+void gen6_gt_pm_unmask_irq(struct intel_gt *gt, u32 mask);
+void gen6_gt_pm_mask_irq(struct intel_gt *gt, u32 mask);
+
+void gen6_gt_pm_enable_irq(struct intel_gt *gt, u32 enable_mask);
+void gen6_gt_pm_disable_irq(struct intel_gt *gt, u32 disable_mask);
+
+void gen6_gt_pm_reset_iir(struct intel_gt *gt, u32 reset_mask);
+
+#endif /* INTEL_GT_PM_IRQ_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
new file mode 100644
index 000000000000..7ef1d37970f6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -0,0 +1,225 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/workqueue.h>
+
+#include "i915_drv.h" /* for_each_engine() */
+#include "i915_request.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_timeline.h"
+
+static bool retire_requests(struct intel_timeline *tl)
+{
+ struct i915_request *rq, *rn;
+
+ list_for_each_entry_safe(rq, rn, &tl->requests, link)
+ if (!i915_request_retire(rq))
+ return false;
+
+ /* And check nothing new was submitted */
+ return !i915_active_fence_isset(&tl->last_request);
+}
+
+static bool flush_submission(struct intel_gt *gt)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ bool active = false;
+
+ if (!intel_gt_pm_is_awake(gt))
+ return false;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_flush_submission(engine);
+ active |= flush_work(&engine->retire_work);
+ active |= flush_work(&engine->wakeref.work);
+ }
+
+ return active;
+}
+
+static void engine_retire(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, typeof(*engine), retire_work);
+ struct intel_timeline *tl = xchg(&engine->retire, NULL);
+
+ do {
+ struct intel_timeline *next = xchg(&tl->retire, NULL);
+
+ /*
+ * Our goal here is to retire _idle_ timelines as soon as
+ * possible (as they are idle, we do not expect userspace
+ * to be cleaning up anytime soon).
+ *
+ * If the timeline is currently locked, either it is being
+ * retired elsewhere or about to be!
+ */
+ if (mutex_trylock(&tl->mutex)) {
+ retire_requests(tl);
+ mutex_unlock(&tl->mutex);
+ }
+ intel_timeline_put(tl);
+
+ GEM_BUG_ON(!next);
+ tl = ptr_mask_bits(next, 1);
+ } while (tl);
+}
+
+static bool add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+#define STUB ((struct intel_timeline *)1)
+ struct intel_timeline *first;
+
+ /*
+ * We open-code a llist here to include the additional tag [BIT(0)]
+ * so that we know when the timeline is already on a
+ * retirement queue: either this engine or another.
+ */
+
+ if (cmpxchg(&tl->retire, NULL, STUB)) /* already queued */
+ return false;
+
+ intel_timeline_get(tl);
+ first = READ_ONCE(engine->retire);
+ do
+ tl->retire = ptr_pack_bits(first, 1, 1);
+ while (!try_cmpxchg(&engine->retire, &first, tl));
+
+ return !first;
+}
+
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl)
+{
+ if (add_retire(engine, tl))
+ schedule_work(&engine->retire_work);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine)
+{
+ INIT_WORK(&engine->retire_work, engine_retire);
+}
+
+void intel_engine_fini_retire(struct intel_engine_cs *engine)
+{
+ flush_work(&engine->retire_work);
+ GEM_BUG_ON(engine->retire);
+}
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
+{
+ struct intel_gt_timelines *timelines = &gt->timelines;
+ struct intel_timeline *tl, *tn;
+ unsigned long active_count = 0;
+ bool interruptible;
+ LIST_HEAD(free);
+
+ interruptible = true;
+ if (unlikely(timeout < 0))
+ timeout = -timeout, interruptible = false;
+
+ flush_submission(gt); /* kick the ksoftirqd tasklets */
+ spin_lock(&timelines->lock);
+ list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
+ if (!mutex_trylock(&tl->mutex)) {
+ active_count++; /* report busy to caller, try again? */
+ continue;
+ }
+
+ intel_timeline_get(tl);
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ atomic_inc(&tl->active_count); /* pin the list element */
+ spin_unlock(&timelines->lock);
+
+ if (timeout > 0) {
+ struct dma_fence *fence;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (fence) {
+ timeout = dma_fence_wait_timeout(fence,
+ interruptible,
+ timeout);
+ dma_fence_put(fence);
+ }
+ }
+
+ if (!retire_requests(tl) || flush_submission(gt))
+ active_count++;
+
+ spin_lock(&timelines->lock);
+
+ /* Resume iteration after dropping lock */
+ list_safe_reset_next(tl, tn, link);
+ if (atomic_dec_and_test(&tl->active_count))
+ list_del(&tl->link);
+
+ mutex_unlock(&tl->mutex);
+
+ /* Defer the final release to after the spinlock */
+ if (refcount_dec_and_test(&tl->kref.refcount)) {
+ GEM_BUG_ON(atomic_read(&tl->active_count));
+ list_add(&tl->link, &free);
+ }
+ }
+ spin_unlock(&timelines->lock);
+
+ list_for_each_entry_safe(tl, tn, &free, link)
+ __intel_timeline_free(&tl->kref);
+
+ return active_count ? timeout : 0;
+}
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout)
+{
+ /* If the device is asleep, we have no requests outstanding */
+ if (!intel_gt_pm_is_awake(gt))
+ return 0;
+
+ while ((timeout = intel_gt_retire_requests_timeout(gt, timeout)) > 0) {
+ cond_resched();
+ if (signal_pending(current))
+ return -EINTR;
+ }
+
+ return timeout;
+}
+
+static void retire_work_handler(struct work_struct *work)
+{
+ struct intel_gt *gt =
+ container_of(work, typeof(*gt), requests.retire_work.work);
+
+ schedule_delayed_work(&gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
+ intel_gt_retire_requests(gt);
+}
+
+void intel_gt_init_requests(struct intel_gt *gt)
+{
+ INIT_DELAYED_WORK(&gt->requests.retire_work, retire_work_handler);
+}
+
+void intel_gt_park_requests(struct intel_gt *gt)
+{
+ cancel_delayed_work(&gt->requests.retire_work);
+}
+
+void intel_gt_unpark_requests(struct intel_gt *gt)
+{
+ schedule_delayed_work(&gt->requests.retire_work,
+ round_jiffies_up_relative(HZ));
+}
+
+void intel_gt_fini_requests(struct intel_gt *gt)
+{
+ /* Wait until the work is marked as finished before unloading! */
+ cancel_delayed_work_sync(&gt->requests.retire_work);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
new file mode 100644
index 000000000000..dbac53baf1cb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h
@@ -0,0 +1,32 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_GT_REQUESTS_H
+#define INTEL_GT_REQUESTS_H
+
+struct intel_engine_cs;
+struct intel_gt;
+struct intel_timeline;
+
+long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout);
+static inline void intel_gt_retire_requests(struct intel_gt *gt)
+{
+ intel_gt_retire_requests_timeout(gt, 0);
+}
+
+void intel_engine_init_retire(struct intel_engine_cs *engine);
+void intel_engine_add_retire(struct intel_engine_cs *engine,
+ struct intel_timeline *tl);
+void intel_engine_fini_retire(struct intel_engine_cs *engine);
+
+int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
+
+void intel_gt_init_requests(struct intel_gt *gt);
+void intel_gt_park_requests(struct intel_gt *gt);
+void intel_gt_unpark_requests(struct intel_gt *gt);
+void intel_gt_fini_requests(struct intel_gt *gt);
+
+#endif /* INTEL_GT_REQUESTS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
new file mode 100644
index 000000000000..96890dd12b5f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_TYPES__
+#define __INTEL_GT_TYPES__
+
+#include <linux/ktime.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "uc/intel_uc.h"
+
+#include "i915_vma.h"
+#include "intel_engine_types.h"
+#include "intel_llc_types.h"
+#include "intel_reset_types.h"
+#include "intel_rc6_types.h"
+#include "intel_rps_types.h"
+#include "intel_wakeref.h"
+
+struct drm_i915_private;
+struct i915_ggtt;
+struct intel_engine_cs;
+struct intel_uncore;
+
+struct intel_gt {
+ struct drm_i915_private *i915;
+ struct intel_uncore *uncore;
+ struct i915_ggtt *ggtt;
+
+ struct intel_uc uc;
+
+ struct intel_gt_timelines {
+ spinlock_t lock; /* protects active_list */
+ struct list_head active_list;
+
+ /* Pack multiple timelines' seqnos into the same page */
+ spinlock_t hwsp_lock;
+ struct list_head hwsp_free_list;
+ } timelines;
+
+ struct intel_gt_requests {
+ /**
+ * We leave the user IRQ off as much as possible,
+ * but this means that requests will finish and never
+ * be retired once the system goes idle. Set a timer to
+ * fire periodically while the ring is running. When it
+ * fires, go retire requests.
+ */
+ struct delayed_work retire_work;
+ } requests;
+
+ struct intel_wakeref wakeref;
+ atomic_t user_wakeref;
+
+ struct list_head closed_vma;
+ spinlock_t closed_lock; /* guards the list of closed_vma */
+
+ struct intel_reset reset;
+
+ /**
+ * Is the GPU currently considered idle, or busy executing
+ * userspace requests? Whilst idle, we allow runtime power
+ * management to power down the hardware and display clocks.
+ * In order to reduce the effect on performance, there
+ * is a slight delay before we do so.
+ */
+ intel_wakeref_t awake;
+
+ struct intel_llc llc;
+ struct intel_rc6 rc6;
+ struct intel_rps rps;
+
+ ktime_t last_init_time;
+
+ struct i915_vma *scratch;
+
+ spinlock_t irq_lock;
+ u32 gt_imr;
+ u32 pm_ier;
+ u32 pm_imr;
+
+ u32 pm_guc_events;
+
+ struct intel_engine_cs *engine[I915_NUM_ENGINES];
+ struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
+ [MAX_ENGINE_INSTANCE + 1];
+
+ /*
+ * Default address space (either GGTT or ppGTT depending on arch).
+ *
+ * Reserved for exclusive use by the kernel.
+ */
+ struct i915_address_space *vm;
+};
+
+enum intel_gt_scratch_field {
+ /* 8 bytes */
+ INTEL_GT_SCRATCH_FIELD_DEFAULT = 0,
+
+ /* 8 bytes */
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128,
+
+ /* 8 bytes */
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
+
+ /* 6 * 8 bytes */
+ INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+ /* 4 bytes */
+ INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
+};
+
+#endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
new file mode 100644
index 000000000000..16acdc5d6734
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h> /* fault-inject.h is not standalone! */
+
+#include <linux/fault-inject.h>
+
+#include "i915_trace.h"
+#include "intel_gt.h"
+#include "intel_gtt.h"
+
+void stash_init(struct pagestash *stash)
+{
+ pagevec_init(&stash->pvec);
+ spin_lock_init(&stash->lock);
+}
+
+static struct page *stash_pop_page(struct pagestash *stash)
+{
+ struct page *page = NULL;
+
+ spin_lock(&stash->lock);
+ if (likely(stash->pvec.nr))
+ page = stash->pvec.pages[--stash->pvec.nr];
+ spin_unlock(&stash->lock);
+
+ return page;
+}
+
+static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
+{
+ unsigned int nr;
+
+ spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);
+
+ nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
+ memcpy(stash->pvec.pages + stash->pvec.nr,
+ pvec->pages + pvec->nr - nr,
+ sizeof(pvec->pages[0]) * nr);
+ stash->pvec.nr += nr;
+
+ spin_unlock(&stash->lock);
+
+ pvec->nr -= nr;
+}
+
+static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
+{
+ struct pagevec stack;
+ struct page *page;
+
+ if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
+ i915_gem_shrink_all(vm->i915);
+
+ page = stash_pop_page(&vm->free_pages);
+ if (page)
+ return page;
+
+ if (!vm->pt_kmap_wc)
+ return alloc_page(gfp);
+
+ /* Look in our global stash of WC pages... */
+ page = stash_pop_page(&vm->i915->mm.wc_stash);
+ if (page)
+ return page;
+
+ /*
+ * Otherwise batch allocate pages to amortize cost of set_pages_wc.
+ *
+ * We have to be careful as page allocation may trigger the shrinker
+ * (via direct reclaim) which will fill up the WC stash underneath us.
+ * So we add our WB pages into a temporary pvec on the stack and merge
+ * them into the WC stash after all the allocations are complete.
+ */
+ pagevec_init(&stack);
+ do {
+ struct page *page;
+
+ page = alloc_page(gfp);
+ if (unlikely(!page))
+ break;
+
+ stack.pages[stack.nr++] = page;
+ } while (pagevec_space(&stack));
+
+ if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
+ page = stack.pages[--stack.nr];
+
+ /* Merge spare WC pages to the global stash */
+ if (stack.nr)
+ stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
+
+ /* Push any surplus WC pages onto the local VM stash */
+ if (stack.nr)
+ stash_push_pagevec(&vm->free_pages, &stack);
+ }
+
+ /* Return unwanted leftovers */
+ if (unlikely(stack.nr)) {
+ WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
+ __pagevec_release(&stack);
+ }
+
+ return page;
+}
+
+static void vm_free_pages_release(struct i915_address_space *vm,
+ bool immediate)
+{
+ struct pagevec *pvec = &vm->free_pages.pvec;
+ struct pagevec stack;
+
+ lockdep_assert_held(&vm->free_pages.lock);
+ GEM_BUG_ON(!pagevec_count(pvec));
+
+ if (vm->pt_kmap_wc) {
+ /*
+ * When we use WC, first fill up the global stash and then
+ * only if full immediately free the overflow.
+ */
+ stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
+
+ /*
+ * As we have made some room in the VM's free_pages,
+ * we can wait for it to fill again. Unless we are
+ * inside i915_address_space_fini() and must
+ * immediately release the pages!
+ */
+ if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
+ return;
+
+ /*
+ * We have to drop the lock to allow ourselves to sleep,
+ * so take a copy of the pvec and clear the stash for
+ * others to use it as we sleep.
+ */
+ stack = *pvec;
+ pagevec_reinit(pvec);
+ spin_unlock(&vm->free_pages.lock);
+
+ pvec = &stack;
+ set_pages_array_wb(pvec->pages, pvec->nr);
+
+ spin_lock(&vm->free_pages.lock);
+ }
+
+ __pagevec_release(pvec);
+}
+
+static void vm_free_page(struct i915_address_space *vm, struct page *page)
+{
+ /*
+ * On !llc, we need to change the pages back to WB. We only do so
+ * in bulk, so we rarely need to change the page attributes here,
+ * but doing so requires a stop_machine() from deep inside arch/x86/mm.
+ * To make detection of the possible sleep more likely, use an
+ * unconditional might_sleep() for everybody.
+ */
+ might_sleep();
+ spin_lock(&vm->free_pages.lock);
+ while (!pagevec_space(&vm->free_pages.pvec))
+ vm_free_pages_release(vm, false);
+ GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
+ pagevec_add(&vm->free_pages.pvec, page);
+ spin_unlock(&vm->free_pages.lock);
+}
+
+void __i915_vm_close(struct i915_address_space *vm)
+{
+ struct i915_vma *vma, *vn;
+
+ mutex_lock(&vm->mutex);
+ list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ /* Keep the obj (and hence the vma) alive as _we_ destroy it */
+ if (!kref_get_unless_zero(&obj->base.refcount))
+ continue;
+
+ atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
+ WARN_ON(__i915_vma_unbind(vma));
+ __i915_vma_put(vma);
+
+ i915_gem_object_put(obj);
+ }
+ GEM_BUG_ON(!list_empty(&vm->bound_list));
+ mutex_unlock(&vm->mutex);
+}
+
+void i915_address_space_fini(struct i915_address_space *vm)
+{
+ spin_lock(&vm->free_pages.lock);
+ if (pagevec_count(&vm->free_pages.pvec))
+ vm_free_pages_release(vm, true);
+ GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
+ spin_unlock(&vm->free_pages.lock);
+
+ drm_mm_takedown(&vm->mm);
+
+ mutex_destroy(&vm->mutex);
+}
+
+static void __i915_vm_release(struct work_struct *work)
+{
+ struct i915_address_space *vm =
+ container_of(work, struct i915_address_space, rcu.work);
+
+ vm->cleanup(vm);
+ i915_address_space_fini(vm);
+
+ kfree(vm);
+}
+
+void i915_vm_release(struct kref *kref)
+{
+ struct i915_address_space *vm =
+ container_of(kref, struct i915_address_space, ref);
+
+ GEM_BUG_ON(i915_is_ggtt(vm));
+ trace_i915_ppgtt_release(vm);
+
+ queue_rcu_work(vm->i915->wq, &vm->rcu);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass)
+{
+ kref_init(&vm->ref);
+ INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
+ atomic_set(&vm->open, 1);
+
+ /*
+ * The vm->mutex must be reclaim safe (for use in the shrinker).
+ * Do a dummy acquire now under fs_reclaim so that any allocation
+ * attempt holding the lock is immediately reported by lockdep.
+ */
+ mutex_init(&vm->mutex);
+ lockdep_set_subclass(&vm->mutex, subclass);
+ i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
+
+ GEM_BUG_ON(!vm->total);
+ drm_mm_init(&vm->mm, 0, vm->total);
+ vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
+
+ stash_init(&vm->free_pages);
+
+ INIT_LIST_HEAD(&vm->bound_list);
+}
+
+void clear_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(!vma->pages);
+
+ if (vma->pages != vma->obj->mm.pages) {
+ sg_free_table(vma->pages);
+ kfree(vma->pages);
+ }
+ vma->pages = NULL;
+
+ memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
+}
+
+static int __setup_page_dma(struct i915_address_space *vm,
+ struct i915_page_dma *p,
+ gfp_t gfp)
+{
+ p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
+ if (unlikely(!p->page))
+ return -ENOMEM;
+
+ p->daddr = dma_map_page_attrs(vm->dma,
+ p->page, 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_NO_WARN);
+ if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
+ vm_free_page(vm, p->page);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+ return __setup_page_dma(vm, p, __GFP_HIGHMEM);
+}
+
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p)
+{
+ dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ vm_free_page(vm, p->page);
+}
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
+{
+ kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
+}
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
+{
+ unsigned long size;
+
+ /*
+ * In order to utilize 64K pages for an object with a size < 2M, we will
+ * need to support a 64K scratch page, given that every 16th entry for a
+ * page-table operating in 64K mode must point to a properly aligned 64K
+ * region, including any PTEs which happen to point to scratch.
+ *
+ * This is only relevant for the 48b PPGTT where we support
+ * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
+ * scratch (read-only) between all vm, we create one 64k scratch page
+ * for all.
+ */
+ size = I915_GTT_PAGE_SIZE_4K;
+ if (i915_vm_is_4lvl(vm) &&
+ HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+ size = I915_GTT_PAGE_SIZE_64K;
+ gfp |= __GFP_NOWARN;
+ }
+ gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;
+
+ do {
+ unsigned int order = get_order(size);
+ struct page *page;
+ dma_addr_t addr;
+
+ page = alloc_pages(gfp, order);
+ if (unlikely(!page))
+ goto skip;
+
+ addr = dma_map_page_attrs(vm->dma,
+ page, 0, size,
+ PCI_DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC |
+ DMA_ATTR_NO_WARN);
+ if (unlikely(dma_mapping_error(vm->dma, addr)))
+ goto free_page;
+
+ if (unlikely(!IS_ALIGNED(addr, size)))
+ goto unmap_page;
+
+ vm->scratch[0].base.page = page;
+ vm->scratch[0].base.daddr = addr;
+ vm->scratch_order = order;
+ return 0;
+
+unmap_page:
+ dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
+free_page:
+ __free_pages(page, order);
+skip:
+ if (size == I915_GTT_PAGE_SIZE_4K)
+ return -ENOMEM;
+
+ size = I915_GTT_PAGE_SIZE_4K;
+ gfp &= ~__GFP_NOWARN;
+ } while (1);
+}
+
+void cleanup_scratch_page(struct i915_address_space *vm)
+{
+ struct i915_page_dma *p = px_base(&vm->scratch[0]);
+ unsigned int order = vm->scratch_order;
+
+ dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
+ PCI_DMA_BIDIRECTIONAL);
+ __free_pages(p->page, order);
+}
+
+void free_scratch(struct i915_address_space *vm)
+{
+ int i;
+
+ if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
+ return;
+
+ for (i = 1; i <= vm->top; i++) {
+ if (!px_dma(&vm->scratch[i]))
+ break;
+ cleanup_page_dma(vm, px_base(&vm->scratch[i]));
+ }
+
+ cleanup_scratch_page(vm);
+}
+
+void gtt_write_workarounds(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+
+ /*
+ * This function is for gtt related workarounds. This function is
+ * called on driver load and after a GPU reset, so you can place
+ * workarounds here even if they get overwritten by GPU reset.
+ */
+ /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */
+ if (IS_BROADWELL(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
+ else if (IS_CHERRYVIEW(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
+ else if (IS_GEN9_LP(i915))
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
+ else if (INTEL_GEN(i915) >= 9 && INTEL_GEN(i915) <= 11)
+ intel_uncore_write(uncore,
+ GEN8_L3_LRA_1_GPGPU,
+ GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
+
+ /*
+ * To support 64K PTEs we need to first enable the use of the
+ * Intermediate-Page-Size(IPS) bit of the PDE field via some magical
+ * mmio, otherwise the page-walker will simply ignore the IPS bit. This
+ * shouldn't be needed after GEN10.
+ *
+ * 64K pages were first introduced from BDW+, although technically they
+ * only *work* from gen9+. For pre-BDW we instead have the option for
+ * 32K pages, but we don't currently have any support for it in our
+ * driver.
+ */
+ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K) &&
+ INTEL_GEN(i915) <= 10)
+ intel_uncore_rmw(uncore,
+ GEN8_GAMW_ECO_DEV_RW_IA,
+ 0,
+ GAMW_ECO_ENABLE_64K_IPS_FIELD);
+
+ if (IS_GEN_RANGE(i915, 8, 11)) {
+ bool can_use_gtt_cache = true;
+
+ /*
+ * According to the BSpec if we use 2M/1G pages then we also
+ * need to disable the GTT cache. At least on BDW we can see
+ * visual corruption when using 2M pages, and not disabling the
+ * GTT cache.
+ */
+ if (HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_2M))
+ can_use_gtt_cache = false;
+
+ /* WaGttCachingOffByDefault */
+ intel_uncore_write(uncore,
+ HSW_GTT_CACHE_EN,
+ can_use_gtt_cache ? GTT_CACHE_EN_ALL : 0);
+ WARN_ON_ONCE(can_use_gtt_cache &&
+ intel_uncore_read(uncore,
+ HSW_GTT_CACHE_EN) == 0);
+ }
+}
+
+u64 gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags)
+{
+ gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
+
+ if (unlikely(flags & PTE_READ_ONLY))
+ pte &= ~_PAGE_RW;
+
+ switch (level) {
+ case I915_CACHE_NONE:
+ pte |= PPAT_UNCACHED;
+ break;
+ case I915_CACHE_WT:
+ pte |= PPAT_DISPLAY_ELLC;
+ break;
+ default:
+ pte |= PPAT_CACHED;
+ break;
+ }
+
+ return pte;
+}
+
+static void tgl_setup_private_ppat(struct intel_uncore *uncore)
+{
+ /* TGL doesn't support LLC or AGE settings */
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(0), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(1), GEN8_PPAT_WC);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(2), GEN8_PPAT_WT);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(3), GEN8_PPAT_UC);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(4), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(5), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(6), GEN8_PPAT_WB);
+ intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
+static void cnl_setup_private_ppat(struct intel_uncore *uncore)
+{
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(0),
+ GEN8_PPAT_WB | GEN8_PPAT_LLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(1),
+ GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(2),
+ GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(3),
+ GEN8_PPAT_UC);
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(4),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(5),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(6),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
+ intel_uncore_write(uncore,
+ GEN10_PAT_INDEX(7),
+ GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+}
+
+/*
+ * The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
+ * bits. When using advanced contexts each context stores its own PAT, but
+ * writing this data shouldn't be harmful even in those cases.
+ */
+static void bdw_setup_private_ppat(struct intel_uncore *uncore)
+{
+ u64 pat;
+
+ pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
+ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
+ GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
+ GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
+ GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
+ GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
+ GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
+ GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
+
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+static void chv_setup_private_ppat(struct intel_uncore *uncore)
+{
+ u64 pat;
+
+ /*
+ * Map WB on BDW to snooped on CHV.
+ *
+ * Only the snoop bit has meaning for CHV, the rest is
+ * ignored.
+ *
+ * The hardware will never snoop for certain types of accesses:
+ * - CPU GTT (GMADR->GGTT->no snoop->memory)
+ * - PPGTT page tables
+ * - some other special cycles
+ *
+ * As with BDW, we also need to consider the following for GT accesses:
+ * "For GGTT, there is NO pat_sel[2:0] from the entry,
+ * so RTL will always use the value corresponding to
+ * pat_sel = 000".
+ * Which means we must set the snoop bit in PAT entry 0
+ * in order to keep the global status page working.
+ */
+
+ pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(1, 0) |
+ GEN8_PPAT(2, 0) |
+ GEN8_PPAT(3, 0) |
+ GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+ GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
+ intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
+}
+
+void setup_private_pat(struct intel_uncore *uncore)
+{
+ struct drm_i915_private *i915 = uncore->i915;
+
+ GEM_BUG_ON(INTEL_GEN(i915) < 8);
+
+ if (INTEL_GEN(i915) >= 12)
+ tgl_setup_private_ppat(uncore);
+ else if (INTEL_GEN(i915) >= 10)
+ cnl_setup_private_ppat(uncore);
+ else if (IS_CHERRYVIEW(i915) || IS_GEN9_LP(i915))
+ chv_setup_private_ppat(uncore);
+ else
+ bdw_setup_private_ppat(uncore);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_gtt.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
new file mode 100644
index 000000000000..7da7681c20b1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -0,0 +1,587 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2020 Intel Corporation
+ *
+ * Please try to maintain the following order within this file unless it makes
+ * sense to do otherwise. From top to bottom:
+ * 1. typedefs
+ * 2. #defines, and macros
+ * 3. structure definitions
+ * 4. function prototypes
+ *
+ * Within each section, please try to order by generation in ascending order,
+ * from top to bottom (ie. gen6 on the top, gen8 on the bottom).
+ */
+
+#ifndef __INTEL_GTT_H__
+#define __INTEL_GTT_H__
+
+#include <linux/io-mapping.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/pagevec.h>
+#include <linux/scatterlist.h>
+#include <linux/workqueue.h>
+
+#include <drm/drm_mm.h>
+
+#include "gt/intel_reset.h"
+#include "i915_gem_fence_reg.h"
+#include "i915_selftest.h"
+#include "i915_vma_types.h"
+
+#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
+#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
+#define DBG(...) trace_printk(__VA_ARGS__)
+#else
+#define DBG(...)
+#endif
+
+#define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */
+
+#define I915_GTT_PAGE_SIZE_4K BIT_ULL(12)
+#define I915_GTT_PAGE_SIZE_64K BIT_ULL(16)
+#define I915_GTT_PAGE_SIZE_2M BIT_ULL(21)
+
+#define I915_GTT_PAGE_SIZE I915_GTT_PAGE_SIZE_4K
+#define I915_GTT_MAX_PAGE_SIZE I915_GTT_PAGE_SIZE_2M
+
+#define I915_GTT_PAGE_MASK -I915_GTT_PAGE_SIZE
+
+#define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE
+
+#define I915_FENCE_REG_NONE -1
+#define I915_MAX_NUM_FENCES 32
+/* 32 fences + sign bit for FENCE_REG_NONE */
+#define I915_MAX_NUM_FENCE_BITS 6
+
+typedef u32 gen6_pte_t;
+typedef u64 gen8_pte_t;
+
+#define ggtt_total_entries(ggtt) ((ggtt)->vm.total >> PAGE_SHIFT)
+
+#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len)))
+#define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1)
+#define I915_PDES 512
+#define I915_PDE_MASK (I915_PDES - 1)
+
+/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
+#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
+#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PDE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
+#define GEN6_PTE_CACHE_LLC (2 << 1)
+#define GEN6_PTE_UNCACHED (1 << 1)
+#define GEN6_PTE_VALID REG_BIT(0)
+
+#define GEN6_PTES I915_PTES(sizeof(gen6_pte_t))
+#define GEN6_PD_SIZE (I915_PDES * PAGE_SIZE)
+#define GEN6_PD_ALIGN (PAGE_SIZE * 16)
+#define GEN6_PDE_SHIFT 22
+#define GEN6_PDE_VALID REG_BIT(0)
+#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
+
+#define GEN7_PTE_CACHE_L3_LLC (3 << 1)
+
+#define BYT_PTE_SNOOPED_BY_CPU_CACHES REG_BIT(2)
+#define BYT_PTE_WRITEABLE REG_BIT(1)
+
+/*
+ * Cacheability Control is a 4-bit value. The low three bits are stored in bits
+ * 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits) ((((bits) & 0x7) << 1) | \
+ (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x2)
+#define HSW_WB_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x8)
+#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE3 HSW_CACHEABILITY_CONTROL(0x7)
+#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
+#define HSW_PTE_UNCACHED (0)
+#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
+#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
+
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
+ * 31:30 | 29:21 | 20:12 | 11:0
+ * PDPE | PDE | PTE | offset
+ * The difference as compared to normal x86 3 level page table is the PDPEs are
+ * programmed via register.
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
+ * 47:39 | 38:30 | 29:21 | 20:12 | 11:0
+ * PML4E | PDPE | PDE | PTE | offset
+ */
+#define GEN8_3LVL_PDPES 4
+
+#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE 0 /* WB LLC */
+#define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */
+
+#define CHV_PPAT_SNOOP REG_BIT(6)
+#define GEN8_PPAT_AGE(x) ((x)<<4)
+#define GEN8_PPAT_LLCeLLC (3<<2)
+#define GEN8_PPAT_LLCELLC (2<<2)
+#define GEN8_PPAT_LLC (1<<2)
+#define GEN8_PPAT_WB (3<<0)
+#define GEN8_PPAT_WT (2<<0)
+#define GEN8_PPAT_WC (1<<0)
+#define GEN8_PPAT_UC (0<<0)
+#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
+#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
+
+#define GEN8_PDE_IPS_64K BIT(11)
+#define GEN8_PDE_PS_2M BIT(7)
+
+#define for_each_sgt_daddr(__dp, __iter, __sgt) \
+ __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE)
+
+struct i915_page_dma {
+ struct page *page;
+ union {
+ dma_addr_t daddr;
+
+ /*
+ * For gen6/gen7 only. This is the offset in the GGTT
+ * where the page directory entries for PPGTT begin
+ */
+ u32 ggtt_offset;
+ };
+};
+
+struct i915_page_scratch {
+ struct i915_page_dma base;
+ u64 encode;
+};
+
+struct i915_page_table {
+ struct i915_page_dma base;
+ atomic_t used;
+};
+
+struct i915_page_directory {
+ struct i915_page_table pt;
+ spinlock_t lock;
+ void *entry[512];
+};
+
+#define __px_choose_expr(x, type, expr, other) \
+ __builtin_choose_expr( \
+ __builtin_types_compatible_p(typeof(x), type) || \
+ __builtin_types_compatible_p(typeof(x), const type), \
+ ({ type __x = (type)(x); expr; }), \
+ other)
+
+#define px_base(px) \
+ __px_choose_expr(px, struct i915_page_dma *, __x, \
+ __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \
+ __px_choose_expr(px, struct i915_page_table *, &__x->base, \
+ __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \
+ (void)0))))
+#define px_dma(px) (px_base(px)->daddr)
+
+#define px_pt(px) \
+ __px_choose_expr(px, struct i915_page_table *, __x, \
+ __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \
+ (void)0))
+#define px_used(px) (&px_pt(px)->used)
+
+enum i915_cache_level;
+
+struct drm_i915_file_private;
+struct drm_i915_gem_object;
+struct i915_vma;
+struct intel_gt;
+
+struct i915_vma_ops {
+ /* Map an object into an address space with the given cache flags. */
+ int (*bind_vma)(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
+ /*
+ * Unmap an object from an address space. This usually consists of
+ * setting the valid PTE entries to a reserved scratch page.
+ */
+ void (*unbind_vma)(struct i915_vma *vma);
+
+ int (*set_pages)(struct i915_vma *vma);
+ void (*clear_pages)(struct i915_vma *vma);
+};
+
+struct pagestash {
+ spinlock_t lock;
+ struct pagevec pvec;
+};
+
+void stash_init(struct pagestash *stash);
+
+struct i915_address_space {
+ struct kref ref;
+ struct rcu_work rcu;
+
+ struct drm_mm mm;
+ struct intel_gt *gt;
+ struct drm_i915_private *i915;
+ struct device *dma;
+ /*
+ * Every address space belongs to a struct file - except for the global
+ * GTT that is owned by the driver (and so @file is set to NULL). In
+ * principle, no information should leak from one context to another
+ * (or between files/processes etc) unless explicitly shared by the
+ * owner. Tracking the owner is important in order to free up per-file
+ * objects along with the file, to aide resource tracking, and to
+ * assign blame.
+ */
+ struct drm_i915_file_private *file;
+ u64 total; /* size addr space maps (ex. 2GB for ggtt) */
+ u64 reserved; /* size addr space reserved */
+
+ unsigned int bind_async_flags;
+
+ /*
+ * Each active user context has its own address space (in full-ppgtt).
+ * Since the vm may be shared between multiple contexts, we count how
+ * many contexts keep us "open". Once open hits zero, we are closed
+ * and do not allow any new attachments, and proceed to shutdown our
+ * vma and page directories.
+ */
+ atomic_t open;
+
+ struct mutex mutex; /* protects vma and our lists */
+#define VM_CLASS_GGTT 0
+#define VM_CLASS_PPGTT 1
+
+ struct i915_page_scratch scratch[4];
+ unsigned int scratch_order;
+ unsigned int top;
+
+ /**
+ * List of vma currently bound.
+ */
+ struct list_head bound_list;
+
+ struct pagestash free_pages;
+
+ /* Global GTT */
+ bool is_ggtt:1;
+
+ /* Some systems require uncached updates of the page directories */
+ bool pt_kmap_wc:1;
+
+ /* Some systems support read-only mappings for GGTT and/or PPGTT */
+ bool has_read_only:1;
+
+ u64 (*pte_encode)(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags); /* Create a valid PTE */
+#define PTE_READ_ONLY BIT(0)
+
+ int (*allocate_va_range)(struct i915_address_space *vm,
+ u64 start, u64 length);
+ void (*clear_range)(struct i915_address_space *vm,
+ u64 start, u64 length);
+ void (*insert_page)(struct i915_address_space *vm,
+ dma_addr_t addr,
+ u64 offset,
+ enum i915_cache_level cache_level,
+ u32 flags);
+ void (*insert_entries)(struct i915_address_space *vm,
+ struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags);
+ void (*cleanup)(struct i915_address_space *vm);
+
+ struct i915_vma_ops vma_ops;
+
+ I915_SELFTEST_DECLARE(struct fault_attr fault_attr);
+ I915_SELFTEST_DECLARE(bool scrub_64K);
+};
+
+/*
+ * The Graphics Translation Table is the way in which GEN hardware translates a
+ * Graphics Virtual Address into a Physical Address. In addition to the normal
+ * collateral associated with any va->pa translations GEN hardware also has a
+ * portion of the GTT which can be mapped by the CPU and remain both coherent
+ * and correct (in cases like swizzling). That region is referred to as GMADR in
+ * the spec.
+ */
+struct i915_ggtt {
+ struct i915_address_space vm;
+
+ struct io_mapping iomap; /* Mapping to our CPU mappable region */
+ struct resource gmadr; /* GMADR resource */
+ resource_size_t mappable_end; /* End offset that we can CPU map */
+
+ /** "Graphics Stolen Memory" holds the global PTEs */
+ void __iomem *gsm;
+ void (*invalidate)(struct i915_ggtt *ggtt);
+
+ /** PPGTT used for aliasing the PPGTT with the GTT */
+ struct i915_ppgtt *alias;
+
+ bool do_idle_maps;
+
+ int mtrr;
+
+ /** Bit 6 swizzling required for X tiling */
+ u32 bit_6_swizzle_x;
+ /** Bit 6 swizzling required for Y tiling */
+ u32 bit_6_swizzle_y;
+
+ u32 pin_bias;
+
+ unsigned int num_fences;
+ struct i915_fence_reg fence_regs[I915_MAX_NUM_FENCES];
+ struct list_head fence_list;
+
+ /**
+ * List of all objects in gtt_space, currently mmaped by userspace.
+ * All objects within this list must also be on bound_list.
+ */
+ struct list_head userfault_list;
+
+ /* Manual runtime pm autosuspend delay for user GGTT mmaps */
+ struct intel_wakeref_auto userfault_wakeref;
+
+ struct mutex error_mutex;
+ struct drm_mm_node error_capture;
+ struct drm_mm_node uc_fw;
+};
+
+struct i915_ppgtt {
+ struct i915_address_space vm;
+
+ struct i915_page_directory *pd;
+};
+
+#define i915_is_ggtt(vm) ((vm)->is_ggtt)
+
+static inline bool
+i915_vm_is_4lvl(const struct i915_address_space *vm)
+{
+ return (vm->total - 1) >> 32;
+}
+
+static inline bool
+i915_vm_has_scratch_64K(struct i915_address_space *vm)
+{
+ return vm->scratch_order == get_order(I915_GTT_PAGE_SIZE_64K);
+}
+
+static inline bool
+i915_vm_has_cache_coloring(struct i915_address_space *vm)
+{
+ return i915_is_ggtt(vm) && vm->mm.color_adjust;
+}
+
+static inline struct i915_ggtt *
+i915_vm_to_ggtt(struct i915_address_space *vm)
+{
+ BUILD_BUG_ON(offsetof(struct i915_ggtt, vm));
+ GEM_BUG_ON(!i915_is_ggtt(vm));
+ return container_of(vm, struct i915_ggtt, vm);
+}
+
+static inline struct i915_ppgtt *
+i915_vm_to_ppgtt(struct i915_address_space *vm)
+{
+ BUILD_BUG_ON(offsetof(struct i915_ppgtt, vm));
+ GEM_BUG_ON(i915_is_ggtt(vm));
+ return container_of(vm, struct i915_ppgtt, vm);
+}
+
+static inline struct i915_address_space *
+i915_vm_get(struct i915_address_space *vm)
+{
+ kref_get(&vm->ref);
+ return vm;
+}
+
+void i915_vm_release(struct kref *kref);
+
+static inline void i915_vm_put(struct i915_address_space *vm)
+{
+ kref_put(&vm->ref, i915_vm_release);
+}
+
+static inline struct i915_address_space *
+i915_vm_open(struct i915_address_space *vm)
+{
+ GEM_BUG_ON(!atomic_read(&vm->open));
+ atomic_inc(&vm->open);
+ return i915_vm_get(vm);
+}
+
+static inline bool
+i915_vm_tryopen(struct i915_address_space *vm)
+{
+ if (atomic_add_unless(&vm->open, 1, 0))
+ return i915_vm_get(vm);
+
+ return false;
+}
+
+void __i915_vm_close(struct i915_address_space *vm);
+
+static inline void
+i915_vm_close(struct i915_address_space *vm)
+{
+ GEM_BUG_ON(!atomic_read(&vm->open));
+ if (atomic_dec_and_test(&vm->open))
+ __i915_vm_close(vm);
+
+ i915_vm_put(vm);
+}
+
+void i915_address_space_init(struct i915_address_space *vm, int subclass);
+void i915_address_space_fini(struct i915_address_space *vm);
+
+static inline u32 i915_pte_index(u64 address, unsigned int pde_shift)
+{
+ const u32 mask = NUM_PTE(pde_shift) - 1;
+
+ return (address >> PAGE_SHIFT) & mask;
+}
+
+/*
+ * Helper to counts the number of PTEs within the given length. This count
+ * does not cross a page table boundary, so the max value would be
+ * GEN6_PTES for GEN6, and GEN8_PTES for GEN8.
+ */
+static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift)
+{
+ const u64 mask = ~((1ULL << pde_shift) - 1);
+ u64 end;
+
+ GEM_BUG_ON(length == 0);
+ GEM_BUG_ON(offset_in_page(addr | length));
+
+ end = addr + length;
+
+ if ((addr & mask) != (end & mask))
+ return NUM_PTE(pde_shift) - i915_pte_index(addr, pde_shift);
+
+ return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline u32 i915_pde_index(u64 addr, u32 shift)
+{
+ return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline struct i915_page_table *
+i915_pt_entry(const struct i915_page_directory * const pd,
+ const unsigned short n)
+{
+ return pd->entry[n];
+}
+
+static inline struct i915_page_directory *
+i915_pd_entry(const struct i915_page_directory * const pdp,
+ const unsigned short n)
+{
+ return pdp->entry[n];
+}
+
+static inline dma_addr_t
+i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
+{
+ struct i915_page_dma *pt = ppgtt->pd->entry[n];
+
+ return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top]));
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt);
+
+int i915_ggtt_probe_hw(struct drm_i915_private *i915);
+int i915_ggtt_init_hw(struct drm_i915_private *i915);
+int i915_ggtt_enable_hw(struct drm_i915_private *i915);
+void i915_ggtt_enable_guc(struct i915_ggtt *ggtt);
+void i915_ggtt_disable_guc(struct i915_ggtt *ggtt);
+int i915_init_ggtt(struct drm_i915_private *i915);
+void i915_ggtt_driver_release(struct drm_i915_private *i915);
+
+static inline bool i915_ggtt_has_aperture(const struct i915_ggtt *ggtt)
+{
+ return ggtt->mappable_end > 0;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt);
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt);
+
+void i915_gem_suspend_gtt_mappings(struct drm_i915_private *i915);
+void i915_gem_restore_gtt_mappings(struct drm_i915_private *i915);
+
+u64 gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ u32 flags);
+
+int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p);
+
+#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+
+void
+fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count);
+
+#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
+#define fill32_px(px, v) do { \
+ u64 v__ = lower_32_bits(v); \
+ fill_px((px), v__ << 32 | v__); \
+} while (0)
+
+int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp);
+void cleanup_scratch_page(struct i915_address_space *vm);
+void free_scratch(struct i915_address_space *vm);
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm);
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm);
+struct i915_page_directory *__alloc_pd(size_t sz);
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd);
+
+#define free_px(vm, px) free_pd(vm, px_base(px))
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_dma * const to,
+ u64 (*encode)(const dma_addr_t, const enum i915_cache_level));
+
+#define set_pd_entry(pd, idx, to) \
+ __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ const struct i915_page_scratch * const scratch);
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_table * const pt,
+ const struct i915_page_scratch * const scratch);
+void gen6_ggtt_invalidate(struct i915_ggtt *ggtt);
+
+int ggtt_set_pages(struct i915_vma *vma);
+int ppgtt_set_pages(struct i915_vma *vma);
+void clear_pages(struct i915_vma *vma);
+
+void gtt_write_workarounds(struct intel_gt *gt);
+
+void setup_private_pat(struct intel_uncore *uncore);
+
+static inline struct sgt_dma {
+ struct scatterlist *sg;
+ dma_addr_t dma, max;
+} sgt_dma(struct i915_vma *vma) {
+ struct scatterlist *sg = vma->pages->sgl;
+ dma_addr_t addr = sg_dma_address(sg);
+
+ return (struct sgt_dma){ sg, addr, addr + sg->length };
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
deleted file mode 100644
index 6bcfa6456c45..000000000000
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "intel_reset.h"
-#include "i915_drv.h"
-
-struct hangcheck {
- u64 acthd;
- u32 ring;
- u32 head;
- enum intel_engine_hangcheck_action action;
- unsigned long action_timestamp;
- int deadlock;
- struct intel_instdone instdone;
- bool wedged:1;
- bool stalled:1;
-};
-
-static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
-{
- u32 tmp = current_instdone | *old_instdone;
- bool unchanged;
-
- unchanged = tmp == *old_instdone;
- *old_instdone |= tmp;
-
- return unchanged;
-}
-
-static bool subunits_stuck(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct intel_instdone instdone;
- struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
- bool stuck;
- int slice;
- int subslice;
-
- if (engine->id != RCS0)
- return true;
-
- intel_engine_get_instdone(engine, &instdone);
-
- /* There might be unstable subunit states even when
- * actual head is not moving. Filter out the unstable ones by
- * accumulating the undone -> done transitions and only
- * consider those as progress.
- */
- stuck = instdone_unchanged(instdone.instdone,
- &accu_instdone->instdone);
- stuck &= instdone_unchanged(instdone.slice_common,
- &accu_instdone->slice_common);
-
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
- stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
- &accu_instdone->sampler[slice][subslice]);
- stuck &= instdone_unchanged(instdone.row[slice][subslice],
- &accu_instdone->row[slice][subslice]);
- }
-
- return stuck;
-}
-
-static enum intel_engine_hangcheck_action
-head_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- if (acthd != engine->hangcheck.acthd) {
-
- /* Clear subunit states on head movement */
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- return ENGINE_ACTIVE_HEAD;
- }
-
- if (!subunits_stuck(engine))
- return ENGINE_ACTIVE_SUBUNITS;
-
- return ENGINE_DEAD;
-}
-
-static enum intel_engine_hangcheck_action
-engine_stuck(struct intel_engine_cs *engine, u64 acthd)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- enum intel_engine_hangcheck_action ha;
- u32 tmp;
-
- ha = head_stuck(engine, acthd);
- if (ha != ENGINE_DEAD)
- return ha;
-
- if (IS_GEN(dev_priv, 2))
- return ENGINE_DEAD;
-
- /* Is the chip hanging on a WAIT_FOR_EVENT?
- * If so we can simply poke the RB_WAIT bit
- * and break the hang. This should work on
- * all but the second generation chipsets.
- */
- tmp = ENGINE_READ(engine, RING_CTL);
- if (tmp & RING_WAIT) {
- i915_handle_error(dev_priv, engine->mask, 0,
- "stuck wait on %s", engine->name);
- ENGINE_WRITE(engine, RING_CTL, tmp);
- return ENGINE_WAIT_KICK;
- }
-
- return ENGINE_DEAD;
-}
-
-static void hangcheck_load_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- hc->acthd = intel_engine_get_active_head(engine);
- hc->ring = ENGINE_READ(engine, RING_START);
- hc->head = ENGINE_READ(engine, RING_HEAD);
-}
-
-static void hangcheck_store_sample(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- engine->hangcheck.acthd = hc->acthd;
- engine->hangcheck.last_ring = hc->ring;
- engine->hangcheck.last_head = hc->head;
-}
-
-static enum intel_engine_hangcheck_action
-hangcheck_get_action(struct intel_engine_cs *engine,
- const struct hangcheck *hc)
-{
- if (intel_engine_is_idle(engine))
- return ENGINE_IDLE;
-
- if (engine->hangcheck.last_ring != hc->ring)
- return ENGINE_ACTIVE_SEQNO;
-
- if (engine->hangcheck.last_head != hc->head)
- return ENGINE_ACTIVE_SEQNO;
-
- return engine_stuck(engine, hc->acthd);
-}
-
-static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
- struct hangcheck *hc)
-{
- unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
-
- hc->action = hangcheck_get_action(engine, hc);
-
- /* We always increment the progress
- * if the engine is busy and still processing
- * the same request, so that no single request
- * can run indefinitely (such as a chain of
- * batches). The only time we do not increment
- * the hangcheck score on this ring, if this
- * engine is in a legitimate wait for another
- * engine. In that case the waiting engine is a
- * victim and we want to be sure we catch the
- * right culprit. Then every time we do kick
- * the ring, make it as a progress as the seqno
- * advancement might ensure and if not, it
- * will catch the hanging engine.
- */
-
- switch (hc->action) {
- case ENGINE_IDLE:
- case ENGINE_ACTIVE_SEQNO:
- /* Clear head and subunit states on seqno movement */
- hc->acthd = 0;
-
- memset(&engine->hangcheck.instdone, 0,
- sizeof(engine->hangcheck.instdone));
-
- /* Intentional fall through */
- case ENGINE_WAIT_KICK:
- case ENGINE_WAIT:
- engine->hangcheck.action_timestamp = jiffies;
- break;
-
- case ENGINE_ACTIVE_HEAD:
- case ENGINE_ACTIVE_SUBUNITS:
- /*
- * Seqno stuck with still active engine gets leeway,
- * in hopes that it is just a long shader.
- */
- timeout = I915_SEQNO_DEAD_TIMEOUT;
- break;
-
- case ENGINE_DEAD:
- break;
-
- default:
- MISSING_CASE(hc->action);
- }
-
- hc->stalled = time_after(jiffies,
- engine->hangcheck.action_timestamp + timeout);
- hc->wedged = time_after(jiffies,
- engine->hangcheck.action_timestamp +
- I915_ENGINE_WEDGED_TIMEOUT);
-}
-
-static void hangcheck_declare_hang(struct drm_i915_private *i915,
- intel_engine_mask_t hung,
- intel_engine_mask_t stuck)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
- char msg[80];
- int len;
-
- /* If some rings hung but others were still busy, only
- * blame the hanging rings in the synopsis.
- */
- if (stuck != hung)
- hung &= ~stuck;
- len = scnprintf(msg, sizeof(msg),
- "%s on ", stuck == hung ? "no progress" : "hang");
- for_each_engine_masked(engine, i915, hung, tmp)
- len += scnprintf(msg + len, sizeof(msg) - len,
- "%s, ", engine->name);
- msg[len-2] = '\0';
-
- return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg);
-}
-
-/*
- * This is called when the chip hasn't reported back with completed
- * batchbuffers in a long time. We keep track per ring seqno progress and
- * if there are no progress, hangcheck score for that ring is increased.
- * Further, acthd is inspected to see if the ring is stuck. On stuck case
- * we kick the ring. If we see no progress on three subsequent calls
- * we assume chip is wedged and try to fix it by resetting the chip.
- */
-static void i915_hangcheck_elapsed(struct work_struct *work)
-{
- struct drm_i915_private *dev_priv =
- container_of(work, typeof(*dev_priv),
- gpu_error.hangcheck_work.work);
- intel_engine_mask_t hung = 0, stuck = 0, wedged = 0;
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- intel_wakeref_t wakeref;
-
- if (!i915_modparams.enable_hangcheck)
- return;
-
- if (!READ_ONCE(dev_priv->gt.awake))
- return;
-
- if (i915_terminally_wedged(dev_priv))
- return;
-
- wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm);
- if (!wakeref)
- return;
-
- /* As enabling the GPU requires fairly extensive mmio access,
- * periodically arm the mmio checker to see if we are triggering
- * any invalid access.
- */
- intel_uncore_arm_unclaimed_mmio_detection(&dev_priv->uncore);
-
- for_each_engine(engine, dev_priv, id) {
- struct hangcheck hc;
-
- intel_engine_signal_breadcrumbs(engine);
-
- hangcheck_load_sample(engine, &hc);
- hangcheck_accumulate_sample(engine, &hc);
- hangcheck_store_sample(engine, &hc);
-
- if (hc.stalled) {
- hung |= engine->mask;
- if (hc.action != ENGINE_DEAD)
- stuck |= engine->mask;
- }
-
- if (hc.wedged)
- wedged |= engine->mask;
- }
-
- if (GEM_SHOW_DEBUG() && (hung | stuck)) {
- struct drm_printer p = drm_debug_printer("hangcheck");
-
- for_each_engine(engine, dev_priv, id) {
- if (intel_engine_is_idle(engine))
- continue;
-
- intel_engine_dump(engine, &p, "%s\n", engine->name);
- }
- }
-
- if (wedged) {
- dev_err(dev_priv->drm.dev,
- "GPU recovery timed out,"
- " cancelling all in-flight rendering.\n");
- GEM_TRACE_DUMP();
- i915_gem_set_wedged(dev_priv);
- }
-
- if (hung)
- hangcheck_declare_hang(dev_priv, hung, stuck);
-
- intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref);
-
- /* Reset timer in case GPU hangs without another request being added */
- i915_queue_hangcheck(dev_priv);
-}
-
-void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
-{
- memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
- engine->hangcheck.action_timestamp = jiffies;
-}
-
-void intel_hangcheck_init(struct drm_i915_private *i915)
-{
- INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work,
- i915_hangcheck_elapsed);
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftest_hangcheck.c"
-#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c b/drivers/gpu/drm/i915/gt/intel_llc.c
new file mode 100644
index 000000000000..ceb785b75c25
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.c
@@ -0,0 +1,161 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/cpufreq.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_llc.h"
+#include "intel_sideband.h"
+
+struct ia_constants {
+ unsigned int min_gpu_freq;
+ unsigned int max_gpu_freq;
+
+ unsigned int min_ring_freq;
+ unsigned int max_ia_freq;
+};
+
+static struct intel_gt *llc_to_gt(struct intel_llc *llc)
+{
+ return container_of(llc, struct intel_gt, llc);
+}
+
+static unsigned int cpu_max_MHz(void)
+{
+ struct cpufreq_policy *policy;
+ unsigned int max_khz;
+
+ policy = cpufreq_cpu_get(0);
+ if (policy) {
+ max_khz = policy->cpuinfo.max_freq;
+ cpufreq_cpu_put(policy);
+ } else {
+ /*
+ * Default to measured freq if none found, PCU will ensure we
+ * don't go over
+ */
+ max_khz = tsc_khz;
+ }
+
+ return max_khz / 1000;
+}
+
+static bool get_ia_constants(struct intel_llc *llc,
+ struct ia_constants *consts)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
+ if (rps->max_freq <= rps->min_freq)
+ return false;
+
+ consts->max_ia_freq = cpu_max_MHz();
+
+ consts->min_ring_freq =
+ intel_uncore_read(llc_to_gt(llc)->uncore, DCLK) & 0xf;
+ /* convert DDR frequency from units of 266.6MHz to bandwidth */
+ consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
+
+ consts->min_gpu_freq = rps->min_freq;
+ consts->max_gpu_freq = rps->max_freq;
+ if (INTEL_GEN(i915) >= 9) {
+ /* Convert GT frequency to 50 HZ units */
+ consts->min_gpu_freq /= GEN9_FREQ_SCALER;
+ consts->max_gpu_freq /= GEN9_FREQ_SCALER;
+ }
+
+ return true;
+}
+
+static void calc_ia_freq(struct intel_llc *llc,
+ unsigned int gpu_freq,
+ const struct ia_constants *consts,
+ unsigned int *out_ia_freq,
+ unsigned int *out_ring_freq)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ const int diff = consts->max_gpu_freq - gpu_freq;
+ unsigned int ia_freq = 0, ring_freq = 0;
+
+ if (INTEL_GEN(i915) >= 9) {
+ /*
+ * ring_freq = 2 * GT. ring_freq is in 100MHz units
+ * No floor required for ring frequency on SKL.
+ */
+ ring_freq = gpu_freq;
+ } else if (INTEL_GEN(i915) >= 8) {
+ /* max(2 * GT, DDR). NB: GT is 50MHz units */
+ ring_freq = max(consts->min_ring_freq, gpu_freq);
+ } else if (IS_HASWELL(i915)) {
+ ring_freq = mult_frac(gpu_freq, 5, 4);
+ ring_freq = max(consts->min_ring_freq, ring_freq);
+ /* leave ia_freq as the default, chosen by cpufreq */
+ } else {
+ const int min_freq = 15;
+ const int scale = 180;
+
+ /*
+ * On older processors, there is no separate ring
+ * clock domain, so in order to boost the bandwidth
+ * of the ring, we need to upclock the CPU (ia_freq).
+ *
+ * For GPU frequencies less than 750MHz,
+ * just use the lowest ring freq.
+ */
+ if (gpu_freq < min_freq)
+ ia_freq = 800;
+ else
+ ia_freq = consts->max_ia_freq - diff * scale / 2;
+ ia_freq = DIV_ROUND_CLOSEST(ia_freq, 100);
+ }
+
+ *out_ia_freq = ia_freq;
+ *out_ring_freq = ring_freq;
+}
+
+static void gen6_update_ring_freq(struct intel_llc *llc)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct ia_constants consts;
+ unsigned int gpu_freq;
+
+ if (!get_ia_constants(llc, &consts))
+ return;
+
+ /*
+ * For each potential GPU frequency, load a ring frequency we'd like
+ * to use for memory access. We do this by specifying the IA frequency
+ * the PCU should use as a reference to determine the ring frequency.
+ */
+ for (gpu_freq = consts.max_gpu_freq;
+ gpu_freq >= consts.min_gpu_freq;
+ gpu_freq--) {
+ unsigned int ia_freq, ring_freq;
+
+ calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+ sandybridge_pcode_write(i915,
+ GEN6_PCODE_WRITE_MIN_FREQ_TABLE,
+ ia_freq << GEN6_PCODE_FREQ_IA_RATIO_SHIFT |
+ ring_freq << GEN6_PCODE_FREQ_RING_RATIO_SHIFT |
+ gpu_freq);
+ }
+}
+
+void intel_llc_enable(struct intel_llc *llc)
+{
+ if (HAS_LLC(llc_to_gt(llc)->i915))
+ gen6_update_ring_freq(llc);
+}
+
+void intel_llc_disable(struct intel_llc *llc)
+{
+ /* Currently there is no HW configuration to be done to disable. */
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_llc.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_llc.h b/drivers/gpu/drm/i915/gt/intel_llc.h
new file mode 100644
index 000000000000..ef09a890d2b7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_H
+#define INTEL_LLC_H
+
+struct intel_llc;
+
+void intel_llc_enable(struct intel_llc *llc);
+void intel_llc_disable(struct intel_llc *llc);
+
+#endif /* INTEL_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_llc_types.h b/drivers/gpu/drm/i915/gt/intel_llc_types.h
new file mode 100644
index 000000000000..ecad4687b930
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_llc_types.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_LLC_TYPES_H
+#define INTEL_LLC_TYPES_H
+
+struct intel_llc {
+};
+
+#endif /* INTEL_LLC_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 82b7ace62d97..a13a8c4b65ab 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -133,15 +133,19 @@
*/
#include <linux/interrupt.h>
-#include "gem/i915_gem_context.h"
-
#include "i915_drv.h"
-#include "i915_gem_render_state.h"
+#include "i915_perf.h"
+#include "i915_trace.h"
#include "i915_vgpu.h"
+#include "intel_context.h"
#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_gt_requests.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h"
#include "intel_reset.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
@@ -161,6 +165,15 @@
#define GEN8_CTX_STATUS_COMPLETED_MASK \
(GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
+#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
+
+#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */
+#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */
+#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15)
+#define GEN12_IDLE_CTX_ID 0x7FF
+#define GEN12_CSB_CTX_VALID(csb_dw) \
+ (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID)
+
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
#define WA_TAIL_DWORDS 2
@@ -214,12 +227,65 @@ static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine)
return container_of(engine, struct virtual_engine, base);
}
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine);
+static int __execlists_context_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine);
+
static void execlists_init_reg_state(u32 *reg_state,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring);
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool close);
+static void
+__execlists_update_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine);
+
+static void mark_eio(struct i915_request *rq)
+{
+ if (i915_request_completed(rq))
+ return;
+
+ GEM_BUG_ON(i915_request_signaled(rq));
+
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+}
+
+static struct i915_request *
+active_request(const struct intel_timeline * const tl, struct i915_request *rq)
+{
+ struct i915_request *active = rq;
+
+ rcu_read_lock();
+ list_for_each_entry_continue_reverse(rq, &tl->requests, link) {
+ if (i915_request_completed(rq))
+ break;
+
+ active = rq;
+ }
+ rcu_read_unlock();
+
+ return active;
+}
+
+static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
+{
+ return (i915_ggtt_offset(engine->status_page.vma) +
+ I915_GEM_HWS_PREEMPT_ADDR);
+}
+
+static inline void
+ring_set_paused(const struct intel_engine_cs *engine, int state)
+{
+ /*
+ * We inspect HWS_PREEMPT with a semaphore inside
+ * engine->emit_fini_breadcrumb. If the dword is true,
+ * the ring is paused as the semaphore will busywait
+ * until the dword is false.
+ */
+ engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state;
+ if (state)
+ wmb();
+}
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
@@ -236,6 +302,17 @@ static int effective_prio(const struct i915_request *rq)
int prio = rq_prio(rq);
/*
+ * If this request is special and must not be interrupted at any
+ * cost, so be it. Note we are only checking the most recent request
+ * in the context and so may be masking an earlier vip request. It
+ * is hoped that under the conditions where nopreempt is used, this
+ * will not matter (i.e. all requests to that context will be
+ * nopreempt for as long as desired).
+ */
+ if (i915_request_has_nopreempt(rq))
+ prio = I915_PRIORITY_UNPREEMPTABLE;
+
+ /*
* On unwinding the active request, we give it a priority bump
* if it has completed waiting on any semaphore. If we know that
* the request has already started, we can prevent an unwanted
@@ -245,6 +322,7 @@ static int effective_prio(const struct i915_request *rq)
prio |= I915_PRIORITY_NOSEMAPHORE;
/* Restrict mere WAIT boosts from triggering preemption */
+ BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */
return prio | __NO_PREEMPTION;
}
@@ -271,10 +349,7 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
{
int last_prio;
- if (!engine->preempt_context)
- return false;
-
- if (i915_request_completed(rq))
+ if (!intel_engine_has_semaphores(engine))
return false;
/*
@@ -288,10 +363,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
* However, the priority hint is a mere hint that we may need to
* preempt. If that hint is stale or we may be trying to preempt
* ourselves, ignore the request.
+ *
+ * More naturally we would write
+ * prio >= max(0, last);
+ * except that we wish to prevent triggering preemption at the same
+ * priority level: the task that is running should remain running
+ * to preserve FIFO ordering of dependencies.
*/
- last_prio = effective_prio(rq);
- if (!i915_scheduler_need_preempt(engine->execlists.queue_priority_hint,
- last_prio))
+ last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1);
+ if (engine->execlists.queue_priority_hint <= last_prio)
return false;
/*
@@ -338,9 +418,6 @@ __maybe_unused static inline bool
assert_priority_queue(const struct i915_request *prev,
const struct i915_request *next)
{
- const struct intel_engine_execlists *execlists =
- &prev->engine->execlists;
-
/*
* Without preemption, the prev may refer to the still active element
* which we refuse to let go.
@@ -348,7 +425,7 @@ assert_priority_queue(const struct i915_request *prev,
* Even with preemption, there are times when we think it is better not
* to preempt and leave an ostensibly lower priority request in flight.
*/
- if (port_request(execlists->port) == prev)
+ if (i915_request_is_active(prev))
return true;
return rq_prio(prev) >= rq_prio(next);
@@ -383,48 +460,496 @@ assert_priority_queue(const struct i915_request *prev,
static u64
lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine)
{
- struct i915_gem_context *ctx = ce->gem_context;
u64 desc;
- BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH)));
- BUILD_BUG_ON(GEN11_MAX_CONTEXT_HW_ID > (BIT(GEN11_SW_CTX_ID_WIDTH)));
+ desc = INTEL_LEGACY_32B_CONTEXT;
+ if (i915_vm_is_4lvl(ce->vm))
+ desc = INTEL_LEGACY_64B_CONTEXT;
+ desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT;
- desc = ctx->desc_template; /* bits 0-11 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 12));
-
- desc |= i915_ggtt_offset(ce->state) + LRC_HEADER_PAGES * PAGE_SIZE;
- /* bits 12-31 */
- GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
+ desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
+ if (IS_GEN(engine->i915, 8))
+ desc |= GEN8_CTX_L3LLC_COHERENT;
+ desc |= i915_ggtt_offset(ce->state); /* bits 12-31 */
/*
* The following 32bits are copied into the OA reports (dword 2).
* Consider updating oa_get_render_ctx_id in i915_perf.c when changing
* anything below.
*/
if (INTEL_GEN(engine->i915) >= 11) {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
- /* bits 37-47 */
-
desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT;
/* bits 48-53 */
- /* TODO: decide what to do with SW counter (bits 55-60) */
-
desc |= (u64)engine->class << GEN11_ENGINE_CLASS_SHIFT;
/* bits 61-63 */
- } else {
- GEM_BUG_ON(ctx->hw_id >= BIT(GEN8_CTX_ID_WIDTH));
- desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */
}
return desc;
}
-static void unwind_wa_tail(struct i915_request *rq)
+static inline unsigned int dword_in_page(void *addr)
+{
+ return offset_in_page(addr) / sizeof(u32);
+}
+
+static void set_offsets(u32 *regs,
+ const u8 *data,
+ const struct intel_engine_cs *engine,
+ bool clear)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6)))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+ (((x) >> 2) & 0x7f)
+#define END(x) 0, (x)
{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
+ const u32 base = engine->mmio_base;
+
+ while (*data) {
+ u8 count, flags;
+
+ if (*data & BIT(7)) { /* skip */
+ count = *data++ & ~BIT(7);
+ if (clear)
+ memset32(regs, MI_NOOP, count);
+ regs += count;
+ continue;
+ }
+
+ count = *data & 0x3f;
+ flags = *data >> 6;
+ data++;
+
+ *regs = MI_LOAD_REGISTER_IMM(count);
+ if (flags & POSTED)
+ *regs |= MI_LRI_FORCE_POSTED;
+ if (INTEL_GEN(engine->i915) >= 11)
+ *regs |= MI_LRI_CS_MMIO;
+ regs++;
+
+ GEM_BUG_ON(!count);
+ do {
+ u32 offset = 0;
+ u8 v;
+
+ do {
+ v = *data++;
+ offset <<= 7;
+ offset |= v & ~BIT(7);
+ } while (v & BIT(7));
+
+ regs[0] = base + (offset << 2);
+ if (clear)
+ regs[1] = 0;
+ regs += 2;
+ } while (--count);
+ }
+
+ if (clear) {
+ u8 count = *++data;
+
+ /* Clear past the tail for HW access */
+ GEM_BUG_ON(dword_in_page(regs) > count);
+ memset32(regs, MI_NOOP, count - dword_in_page(regs));
+
+ /* Close the batch; used mainly by live_lrc_layout() */
+ *regs = MI_BATCH_BUFFER_END;
+ if (INTEL_GEN(engine->i915) >= 10)
+ *regs |= BIT(0);
+ }
+}
+
+static const u8 gen8_xcs_offsets[] = {
+ NOP(1),
+ LRI(11, 0),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+
+ NOP(9),
+ LRI(9, 0),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(2, 0),
+ REG16(0x200),
+ REG(0x028),
+
+ END(80)
+};
+
+static const u8 gen9_xcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, POSTED),
+ REG16(0x200),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x028),
+ REG(0x09c),
+ REG(0x0c0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x068),
+
+ END(176)
+};
+
+static const u8 gen12_xcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END(80)
+};
+
+static const u8 gen8_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(80)
+};
+
+static const u8 gen9_rcs_offsets[] = {
+ NOP(1),
+ LRI(14, POSTED),
+ REG16(0x244),
+ REG(0x34),
+ REG(0x30),
+ REG(0x38),
+ REG(0x3c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+
+ NOP(3),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(13),
+ LRI(1, 0),
+ REG(0xc8),
+
+ NOP(13),
+ LRI(44, POSTED),
+ REG(0x28),
+ REG(0x9c),
+ REG(0xc0),
+ REG(0x178),
+ REG(0x17c),
+ REG16(0x358),
+ REG(0x170),
+ REG(0x150),
+ REG(0x154),
+ REG(0x158),
+ REG16(0x41c),
+ REG16(0x600),
+ REG16(0x604),
+ REG16(0x608),
+ REG16(0x60c),
+ REG16(0x610),
+ REG16(0x614),
+ REG16(0x618),
+ REG16(0x61c),
+ REG16(0x620),
+ REG16(0x624),
+ REG16(0x628),
+ REG16(0x62c),
+ REG16(0x630),
+ REG16(0x634),
+ REG16(0x638),
+ REG16(0x63c),
+ REG16(0x640),
+ REG16(0x644),
+ REG16(0x648),
+ REG16(0x64c),
+ REG16(0x650),
+ REG16(0x654),
+ REG16(0x658),
+ REG16(0x65c),
+ REG16(0x660),
+ REG16(0x664),
+ REG16(0x668),
+ REG16(0x66c),
+ REG16(0x670),
+ REG16(0x674),
+ REG16(0x678),
+ REG16(0x67c),
+ REG(0x68),
+
+ END(176)
+};
+
+static const u8 gen11_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x11c),
+ REG(0x114),
+ REG(0x118),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(1, POSTED),
+ REG(0x1b0),
+
+ NOP(10),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(80)
+};
+
+static const u8 gen12_rcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+
+ NOP(5),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ LRI(3, POSTED),
+ REG(0x1b0),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END(80)
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
+{
+ /*
+ * The gen12+ lists only have the registers we program in the basic
+ * default state. We rely on the context image using relative
+ * addressing to automatic fixup the register state between the
+ * physical engines for virtual engine.
+ */
+ GEM_BUG_ON(INTEL_GEN(engine->i915) >= 12 &&
+ !intel_engine_has_relative_mmio(engine));
+
+ if (engine->class == RENDER_CLASS) {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 11)
+ return gen11_rcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_rcs_offsets;
+ else
+ return gen8_rcs_offsets;
+ } else {
+ if (INTEL_GEN(engine->i915) >= 12)
+ return gen12_xcs_offsets;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return gen9_xcs_offsets;
+ else
+ return gen8_xcs_offsets;
+ }
}
static struct i915_request *
@@ -439,15 +964,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
- struct intel_engine_cs *owner;
-
if (i915_request_completed(rq))
- break;
+ continue; /* XXX */
__i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
-
- GEM_BUG_ON(rq->hw_context->inflight);
/*
* Push the request back into the queue for later resubmission.
@@ -456,8 +976,7 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
* engine so that it can be moved across onto another physical
* engine as load dictates.
*/
- owner = rq->hw_context->engine;
- if (likely(owner == engine)) {
+ if (likely(rq->execution_mask == engine->mask)) {
GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID);
if (rq_prio(rq) != prio) {
prio = rq_prio(rq);
@@ -466,8 +985,26 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
list_move(&rq->sched.link, pl);
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+
active = rq;
} else {
+ struct intel_engine_cs *owner = rq->context->engine;
+
+ /*
+ * Decouple the virtual breadcrumb before moving it
+ * back to the virtual engine -- we don't want the
+ * request to complete in the background and try
+ * and cancel the breadcrumb on the virtual engine
+ * (instead of the old engine where it is linked)!
+ */
+ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+ &rq->fence.flags)) {
+ spin_lock_nested(&rq->lock,
+ SINGLE_DEPTH_NESTING);
+ i915_request_cancel_breadcrumb(rq);
+ spin_unlock(&rq->lock);
+ }
rq->engine = owner;
owner->submit_request(rq);
active = NULL;
@@ -500,32 +1037,223 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status)
status, rq);
}
-inline void
-execlists_user_begin(struct intel_engine_execlists *execlists,
- const struct execlist_port *port)
+static void intel_engine_context_in(struct intel_engine_cs *engine)
{
- execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ if (engine->stats.active++ == 0)
+ engine->stats.start = ktime_get();
+ GEM_BUG_ON(engine->stats.active == 0);
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
-inline void
-execlists_user_end(struct intel_engine_execlists *execlists)
+static void intel_engine_context_out(struct intel_engine_cs *engine)
{
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+ unsigned long flags;
+
+ if (READ_ONCE(engine->stats.enabled) == 0)
+ return;
+
+ write_seqlock_irqsave(&engine->stats.lock, flags);
+
+ if (engine->stats.enabled > 0) {
+ ktime_t last;
+
+ if (engine->stats.active && --engine->stats.active == 0) {
+ /*
+ * Decrement the active context count and in case GPU
+ * is now idle add up to the running total.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.start);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ } else if (engine->stats.active == 0) {
+ /*
+ * After turning on engine stats, context out might be
+ * the first event in which case we account from the
+ * time stats gathering was turned on.
+ */
+ last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+ engine->stats.total = ktime_add(engine->stats.total,
+ last);
+ }
+ }
+
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
}
-static inline void
-execlists_context_schedule_in(struct i915_request *rq)
+static int lrc_ring_mi_mode(const struct intel_engine_cs *engine)
+{
+ if (INTEL_GEN(engine->i915) >= 12)
+ return 0x60;
+ else if (INTEL_GEN(engine->i915) >= 9)
+ return 0x54;
+ else if (engine->class == RENDER_CLASS)
+ return 0x58;
+ else
+ return -1;
+}
+
+static void
+execlists_check_context(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
{
- GEM_BUG_ON(rq->hw_context->inflight);
+ const struct intel_ring *ring = ce->ring;
+ u32 *regs = ce->lrc_reg_state;
+ bool valid = true;
+ int x;
+
+ if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) {
+ pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_START],
+ i915_ggtt_offset(ring->vma));
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ valid = false;
+ }
+
+ if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) !=
+ (RING_CTL_SIZE(ring->size) | RING_VALID)) {
+ pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n",
+ engine->name,
+ regs[CTX_RING_CTL],
+ (u32)(RING_CTL_SIZE(ring->size) | RING_VALID));
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+ valid = false;
+ }
+
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) {
+ pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n",
+ engine->name, regs[x + 1]);
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
+ valid = false;
+ }
+
+ WARN_ONCE(!valid, "Invalid lrc state found before submission\n");
+}
+static void restore_default_state(struct intel_context *ce,
+ struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ if (engine->pinned_default_state)
+ memcpy(regs, /* skip restoring the vanilla PPHWSP */
+ engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
+ engine->context_size - PAGE_SIZE);
+
+ execlists_init_reg_state(regs, ce, engine, ce->ring, false);
+}
+
+static void reset_active(struct i915_request *rq,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context * const ce = rq->context;
+ u32 head;
+
+ /*
+ * The executing context has been cancelled. We want to prevent
+ * further execution along this context and propagate the error on
+ * to anything depending on its results.
+ *
+ * In __i915_request_submit(), we apply the -EIO and remove the
+ * requests' payloads for any banned requests. But first, we must
+ * rewind the context back to the start of the incomplete request so
+ * that we do not jump back into the middle of the batch.
+ *
+ * We preserve the breadcrumbs and semaphores of the incomplete
+ * requests so that inter-timeline dependencies (i.e other timelines)
+ * remain correctly ordered. And we defer to __i915_request_submit()
+ * so that all asynchronous waits are correctly handled.
+ */
+ ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n",
+ rq->fence.context, rq->fence.seqno);
+
+ /* On resubmission of the active request, payload will be scrubbed */
+ if (i915_request_completed(rq))
+ head = rq->tail;
+ else
+ head = active_request(ce->timeline, rq)->head;
+ ce->ring->head = intel_ring_wrap(ce->ring, head);
+ intel_ring_update_space(ce->ring);
+
+ /* Scrub the context image to prevent replaying the previous batch */
+ restore_default_state(ce, engine);
+ __execlists_update_reg_state(ce, engine);
+
+ /* We've switched away, so this should be a no-op, but intent matters */
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+}
+
+static inline struct intel_engine_cs *
+__execlists_schedule_in(struct i915_request *rq)
+{
+ struct intel_engine_cs * const engine = rq->engine;
+ struct intel_context * const ce = rq->context;
+
+ intel_context_get(ce);
+
+ if (unlikely(intel_context_is_banned(ce)))
+ reset_active(rq, engine);
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ execlists_check_context(ce, engine);
+
+ if (ce->tag) {
+ /* Use a fixed tag for OA and friends */
+ ce->lrc_desc |= (u64)ce->tag << 32;
+ } else {
+ /* We don't need a strict matching tag, just different values */
+ ce->lrc_desc &= ~GENMASK_ULL(47, 37);
+ ce->lrc_desc |=
+ (u64)(++engine->context_tag % NUM_CONTEXT_TAG) <<
+ GEN11_SW_CTX_ID_SHIFT;
+ BUILD_BUG_ON(NUM_CONTEXT_TAG > GEN12_MAX_CONTEXT_HW_ID);
+ }
+
+ __intel_gt_pm_get(engine->gt);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
- intel_engine_context_in(rq->engine);
- rq->hw_context->inflight = rq->engine;
+ intel_engine_context_in(engine);
+
+ return engine;
}
-static void kick_siblings(struct i915_request *rq)
+static inline struct i915_request *
+execlists_schedule_in(struct i915_request *rq, int idx)
{
- struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+ struct intel_context * const ce = rq->context;
+ struct intel_engine_cs *old;
+
+ GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine));
+ trace_i915_request_in(rq, idx);
+
+ old = READ_ONCE(ce->inflight);
+ do {
+ if (!old) {
+ WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq));
+ break;
+ }
+ } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old)));
+
+ GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
+ return i915_request_get(rq);
+}
+
+static void kick_siblings(struct i915_request *rq, struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
struct i915_request *next = READ_ONCE(ve->request);
if (next && next->execution_mask & ~rq->execution_mask)
@@ -533,32 +1261,85 @@ static void kick_siblings(struct i915_request *rq)
}
static inline void
-execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
+__execlists_schedule_out(struct i915_request *rq,
+ struct intel_engine_cs * const engine)
{
- rq->hw_context->inflight = NULL;
- intel_engine_context_out(rq->engine);
- execlists_context_status_change(rq, status);
- trace_i915_request_out(rq);
+ struct intel_context * const ce = rq->context;
/*
- * If this is part of a virtual engine, its next request may have
- * been blocked waiting for access to the active context. We have
- * to kick all the siblings again in case we need to switch (e.g.
- * the next request is not runnable on this engine). Hopefully,
- * we will already have submitted the next request before the
- * tasklet runs and do not need to rebuild each virtual tree
- * and kick everyone again.
+ * NB process_csb() is not under the engine->active.lock and hence
+ * schedule_out can race with schedule_in meaning that we should
+ * refrain from doing non-trivial work here.
*/
- if (rq->engine != rq->hw_context->engine)
- kick_siblings(rq);
+
+ /*
+ * If we have just completed this context, the engine may now be
+ * idle and we want to re-enter powersaving.
+ */
+ if (list_is_last(&rq->link, &ce->timeline->requests) &&
+ i915_request_completed(rq))
+ intel_engine_add_retire(engine, ce->timeline);
+
+ intel_engine_context_out(engine);
+ execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
+ intel_gt_pm_put_async(engine->gt);
+
+ /*
+ * If this is part of a virtual engine, its next request may
+ * have been blocked waiting for access to the active context.
+ * We have to kick all the siblings again in case we need to
+ * switch (e.g. the next request is not runnable on this
+ * engine). Hopefully, we will already have submitted the next
+ * request before the tasklet runs and do not need to rebuild
+ * each virtual tree and kick everyone again.
+ */
+ if (ce->engine != engine)
+ kick_siblings(rq, ce);
+
+ intel_context_put(ce);
+}
+
+static inline void
+execlists_schedule_out(struct i915_request *rq)
+{
+ struct intel_context * const ce = rq->context;
+ struct intel_engine_cs *cur, *old;
+
+ trace_i915_request_out(rq);
+
+ old = READ_ONCE(ce->inflight);
+ do
+ cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL;
+ while (!try_cmpxchg(&ce->inflight, &old, cur));
+ if (!cur)
+ __execlists_schedule_out(rq, old);
+
+ i915_request_put(rq);
}
static u64 execlists_update_context(struct i915_request *rq)
{
- struct intel_context *ce = rq->hw_context;
+ struct intel_context *ce = rq->context;
+ u64 desc = ce->lrc_desc;
+ u32 tail;
- ce->lrc_reg_state[CTX_RING_TAIL + 1] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ *
+ * We should never submit the context with the same RING_TAIL twice
+ * just in case we submit an empty ring, which confuses the HW.
+ *
+ * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+ * the normal request to be able to always advance the RING_TAIL on
+ * subsequent resubmissions (for lite restore). Should that fail us,
+ * and we try and submit the same tail again, force the context
+ * reload.
+ */
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -569,14 +1350,11 @@ static u64 execlists_update_context(struct i915_request *rq)
* may not be visible to the HW prior to the completion of the UC
* register write and that we may begin execution from the context
* before its image is complete leading to invalid PD chasing.
- *
- * Furthermore, Braswell, at least, wants a full mb to be sure that
- * the writes are coherent in memory (visible to the GPU) prior to
- * execution, and not just visible to other CPUs (as is the result of
- * wmb).
*/
- mb();
- return ce->lrc_desc;
+ wmb();
+
+ ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
+ return desc;
}
static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port)
@@ -590,12 +1368,110 @@ static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc
}
}
+static __maybe_unused void
+trace_ports(const struct intel_engine_execlists *execlists,
+ const char *msg,
+ struct i915_request * const *ports)
+{
+ const struct intel_engine_cs *engine =
+ container_of(execlists, typeof(*engine), execlists);
+
+ if (!ports[0])
+ return;
+
+ ENGINE_TRACE(engine, "%s { %llx:%lld%s, %llx:%lld }\n", msg,
+ ports[0]->fence.context,
+ ports[0]->fence.seqno,
+ i915_request_completed(ports[0]) ? "!" :
+ i915_request_started(ports[0]) ? "*" :
+ "",
+ ports[1] ? ports[1]->fence.context : 0,
+ ports[1] ? ports[1]->fence.seqno : 0);
+}
+
+static __maybe_unused bool
+assert_pending_valid(const struct intel_engine_execlists *execlists,
+ const char *msg)
+{
+ struct i915_request * const *port, *rq;
+ struct intel_context *ce = NULL;
+
+ trace_ports(execlists, msg, execlists->pending);
+
+ if (!execlists->pending[0]) {
+ GEM_TRACE_ERR("Nothing pending for promotion!\n");
+ return false;
+ }
+
+ if (execlists->pending[execlists_num_ports(execlists)]) {
+ GEM_TRACE_ERR("Excess pending[%d] for promotion!\n",
+ execlists_num_ports(execlists));
+ return false;
+ }
+
+ for (port = execlists->pending; (rq = *port); port++) {
+ unsigned long flags;
+ bool ok = true;
+
+ GEM_BUG_ON(!kref_read(&rq->fence.refcount));
+ GEM_BUG_ON(!i915_request_is_active(rq));
+
+ if (ce == rq->context) {
+ GEM_TRACE_ERR("Dup context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ return false;
+ }
+ ce = rq->context;
+
+ /* Hold tightly onto the lock to prevent concurrent retires! */
+ if (!spin_trylock_irqsave(&rq->lock, flags))
+ continue;
+
+ if (i915_request_completed(rq))
+ goto unlock;
+
+ if (i915_active_is_idle(&ce->active) &&
+ !intel_context_is_barrier(ce)) {
+ GEM_TRACE_ERR("Inactive context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ ok = false;
+ goto unlock;
+ }
+
+ if (!i915_vma_is_pinned(ce->state)) {
+ GEM_TRACE_ERR("Unpinned context:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ ok = false;
+ goto unlock;
+ }
+
+ if (!i915_vma_is_pinned(ce->ring->vma)) {
+ GEM_TRACE_ERR("Unpinned ring:%llx in pending[%zd]\n",
+ ce->timeline->fence_context,
+ port - execlists->pending);
+ ok = false;
+ goto unlock;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+ if (!ok)
+ return false;
+ }
+
+ return ce;
+}
+
static void execlists_submit_ports(struct intel_engine_cs *engine)
{
struct intel_engine_execlists *execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
unsigned int n;
+ GEM_BUG_ON(!assert_pending_valid(execlists, "submit"));
+
/*
* We can skip acquiring intel_runtime_pm_get() here as it was taken
* on our behalf by the request (see i915_gem_mark_busy()) and it will
@@ -604,7 +1480,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
* that all ELSP are drained i.e. we have processed the CSB,
* before allowing ourselves to idle and calling intel_runtime_pm_put().
*/
- GEM_BUG_ON(!intel_wakeref_active(&engine->wakeref));
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
/*
* ELSQ note: the submit queue is not cleared after being submitted
@@ -613,44 +1489,22 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
* of elsq entries, keep this in mind before changing the loop below.
*/
for (n = execlists_num_ports(execlists); n--; ) {
- struct i915_request *rq;
- unsigned int count;
- u64 desc;
-
- rq = port_unpack(&port[n], &count);
- if (rq) {
- GEM_BUG_ON(count > !n);
- if (!count++)
- execlists_context_schedule_in(rq);
- port_set(&port[n], port_pack(rq, count));
- desc = execlists_update_context(rq);
- GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
-
- GEM_TRACE("%s in[%d]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name, n,
- port[n].context_id, count,
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq),
- rq_prio(rq));
- } else {
- GEM_BUG_ON(!n);
- desc = 0;
- }
+ struct i915_request *rq = execlists->pending[n];
- write_desc(execlists, desc, n);
+ write_desc(execlists,
+ rq ? execlists_update_context(rq) : 0,
+ n);
}
/* we need to manually load the submit queue */
if (execlists->ctrl_reg)
writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
}
static bool ctx_single_port_submission(const struct intel_context *ce)
{
return (IS_ENABLED(CONFIG_DRM_I915_GVT) &&
- i915_gem_context_force_single_submission(ce->gem_context));
+ intel_context_force_single_submission(ce));
}
static bool can_merge_ctx(const struct intel_context *prev,
@@ -668,110 +1522,35 @@ static bool can_merge_ctx(const struct intel_context *prev,
static bool can_merge_rq(const struct i915_request *prev,
const struct i915_request *next)
{
+ GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
- if (!can_merge_ctx(prev->hw_context, next->hw_context))
- return false;
-
- return true;
-}
-
-static void port_assign(struct execlist_port *port, struct i915_request *rq)
-{
- GEM_BUG_ON(rq == port_request(port));
-
- if (port_isset(port))
- i915_request_put(port_request(port));
-
- port_set(port, port_pack(i915_request_get(rq), port_count(port)));
-}
-
-static void inject_preempt_context(struct intel_engine_cs *engine)
-{
- struct intel_engine_execlists *execlists = &engine->execlists;
- struct intel_context *ce = engine->preempt_context;
- unsigned int n;
-
- GEM_BUG_ON(execlists->preempt_complete_status !=
- upper_32_bits(ce->lrc_desc));
-
/*
- * Switch to our empty preempt context so
- * the state of the GPU is known (idle).
+ * We do not submit known completed requests. Therefore if the next
+ * request is already completed, we can pretend to merge it in
+ * with the previous context (and we will skip updating the ELSP
+ * and tracking). Thus hopefully keeping the ELSP full with active
+ * contexts, despite the best efforts of preempt-to-busy to confuse
+ * us.
*/
- GEM_TRACE("%s\n", engine->name);
- for (n = execlists_num_ports(execlists); --n; )
- write_desc(execlists, 0, n);
-
- write_desc(execlists, ce->lrc_desc, n);
-
- /* we need to manually load the submit queue */
- if (execlists->ctrl_reg)
- writel(EL_CTRL_LOAD, execlists->ctrl_reg);
-
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
- execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
-
- (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
-}
+ if (i915_request_completed(next))
+ return true;
-static void complete_preempt_context(struct intel_engine_execlists *execlists)
-{
- GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
+ if (unlikely((prev->fence.flags ^ next->fence.flags) &
+ (BIT(I915_FENCE_FLAG_NOPREEMPT) |
+ BIT(I915_FENCE_FLAG_SENTINEL))))
+ return false;
- if (inject_preempt_hang(execlists))
- return;
+ if (!can_merge_ctx(prev->context, next->context))
+ return false;
- execlists_cancel_port_requests(execlists);
- __unwind_incomplete_requests(container_of(execlists,
- struct intel_engine_cs,
- execlists));
+ return true;
}
static void virtual_update_register_offsets(u32 *regs,
struct intel_engine_cs *engine)
{
- u32 base = engine->mmio_base;
-
- /* Must match execlists_init_reg_state()! */
-
- regs[CTX_CONTEXT_CONTROL] =
- i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
- regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
- regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
- regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
- regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
-
- regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
- regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
- regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
- regs[CTX_SECOND_BB_HEAD_U] =
- i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
- regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
- regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
-
- regs[CTX_CTX_TIMESTAMP] =
- i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
- regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
- regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
- regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
- regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
- regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
- regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
- regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
- regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
-
- if (engine->class == RENDER_CLASS) {
- regs[CTX_RCS_INDIRECT_CTX] =
- i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
- regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
- i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
- regs[CTX_BB_PER_CTX_PTR] =
- i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
-
- regs[CTX_R_PWR_CLK_STATE] =
- i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
- }
+ set_offsets(regs, reg_offsets(engine), engine, false);
}
static bool virtual_matches(const struct virtual_engine *ve,
@@ -792,7 +1571,7 @@ static bool virtual_matches(const struct virtual_engine *ve,
* we reuse the register offsets). This is a very small
* hystersis on the greedy seelction algorithm.
*/
- inflight = READ_ONCE(ve->context.inflight);
+ inflight = intel_context_inflight(&ve->context);
if (inflight && inflight != engine)
return false;
@@ -810,18 +1589,171 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
if (!list_empty(&ve->context.signal_link)) {
list_move_tail(&ve->context.signal_link,
&engine->breadcrumbs.signalers);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
spin_unlock(&old->breadcrumbs.irq_lock);
}
+static struct i915_request *
+last_active(const struct intel_engine_execlists *execlists)
+{
+ struct i915_request * const *last = READ_ONCE(execlists->active);
+
+ while (*last && i915_request_completed(*last))
+ last++;
+
+ return *last;
+}
+
+static void defer_request(struct i915_request *rq, struct list_head * const pl)
+{
+ LIST_HEAD(list);
+
+ /*
+ * We want to move the interrupted request to the back of
+ * the round-robin list (i.e. its priority level), but
+ * in doing so, we must then move all requests that were in
+ * flight and were waiting for the interrupted request to
+ * be run after it again.
+ */
+ do {
+ struct i915_dependency *p;
+
+ GEM_BUG_ON(i915_request_is_active(rq));
+ list_move_tail(&rq->sched.link, pl);
+
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ /* Leave semaphores spinning on the other engines */
+ if (w->engine != rq->engine)
+ continue;
+
+ /* No waiter should start before its signaler */
+ GEM_BUG_ON(i915_request_started(w) &&
+ !i915_request_completed(rq));
+
+ GEM_BUG_ON(i915_request_is_active(w));
+ if (!i915_request_is_ready(w))
+ continue;
+
+ if (rq_prio(w) < rq_prio(rq))
+ continue;
+
+ GEM_BUG_ON(rq_prio(w) > rq_prio(rq));
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static void defer_active(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = __unwind_incomplete_requests(engine);
+ if (!rq)
+ return;
+
+ defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq)));
+}
+
+static bool
+need_timeslice(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+ int hint;
+
+ if (!intel_engine_has_timeslices(engine))
+ return false;
+
+ if (list_is_last(&rq->sched.link, &engine->active.requests))
+ return false;
+
+ hint = max(rq_prio(list_next_entry(rq, sched.link)),
+ engine->execlists.queue_priority_hint);
+
+ return hint >= effective_prio(rq);
+}
+
+static int
+switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq)
+{
+ if (list_is_last(&rq->sched.link, &engine->active.requests))
+ return INT_MIN;
+
+ return rq_prio(list_next_entry(rq, sched.link));
+}
+
+static inline unsigned long
+timeslice(const struct intel_engine_cs *engine)
+{
+ return READ_ONCE(engine->props.timeslice_duration_ms);
+}
+
+static unsigned long
+active_timeslice(const struct intel_engine_cs *engine)
+{
+ const struct i915_request *rq = *engine->execlists.active;
+
+ if (!rq || i915_request_completed(rq))
+ return 0;
+
+ if (engine->execlists.switch_priority_hint < effective_prio(rq))
+ return 0;
+
+ return timeslice(engine);
+}
+
+static void set_timeslice(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_timeslices(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.timer, active_timeslice(engine));
+}
+
+static void record_preemption(struct intel_engine_execlists *execlists)
+{
+ (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++);
+}
+
+static unsigned long active_preempt_timeout(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = last_active(&engine->execlists);
+ if (!rq)
+ return 0;
+
+ /* Force a fast reset for terminated contexts (ignoring sysfs!) */
+ if (unlikely(intel_context_is_banned(rq->context)))
+ return 1;
+
+ return READ_ONCE(engine->props.preempt_timeout_ms);
+}
+
+static void set_preempt_timeout(struct intel_engine_cs *engine)
+{
+ if (!intel_engine_has_preempt_reset(engine))
+ return;
+
+ set_timer_ms(&engine->execlists.preempt,
+ active_preempt_timeout(engine));
+}
+
+static inline void clear_ports(struct i915_request **ports, int count)
+{
+ memset_p((void **)ports, NULL, count);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
- const struct execlist_port * const last_port =
- &execlists->port[execlists->port_mask];
- struct i915_request *last = port_request(port);
+ struct i915_request **port = execlists->pending;
+ struct i915_request ** const last_port = port + execlists->port_mask;
+ struct i915_request *last;
struct rb_node *rb;
bool submit = false;
@@ -867,65 +1799,100 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
break;
}
+ /*
+ * If the queue is higher priority than the last
+ * request in the currently active context, submit afresh.
+ * We will resubmit again afterwards in case we need to split
+ * the active context to interject the preemption request,
+ * i.e. we will retrigger preemption following the ack in case
+ * of trouble.
+ */
+ last = last_active(execlists);
if (last) {
- /*
- * Don't resubmit or switch until all outstanding
- * preemptions (lite-restore) are seen. Then we
- * know the next preemption status we see corresponds
- * to this ELSP update.
- */
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
- GEM_BUG_ON(!port_count(&port[0]));
+ if (need_preempt(engine, last, rb)) {
+ ENGINE_TRACE(engine,
+ "preempting last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
+ record_preemption(execlists);
- /*
- * If we write to ELSP a second time before the HW has had
- * a chance to respond to the previous write, we can confuse
- * the HW and hit "undefined behaviour". After writing to ELSP,
- * we must then wait until we see a context-switch event from
- * the HW to indicate that it has had a chance to respond.
- */
- if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK))
- return;
+ /*
+ * Don't let the RING_HEAD advance past the breadcrumb
+ * as we unwind (and until we resubmit) so that we do
+ * not accidentally tell it to go backwards.
+ */
+ ring_set_paused(engine, 1);
- if (need_preempt(engine, last, rb)) {
- inject_preempt_context(engine);
- return;
- }
+ /*
+ * Note that we have not stopped the GPU at this point,
+ * so we are unwinding the incomplete requests as they
+ * remain inflight and so by the time we do complete
+ * the preemption, some of the unwound requests may
+ * complete!
+ */
+ __unwind_incomplete_requests(engine);
- /*
- * In theory, we could coalesce more requests onto
- * the second port (the first port is active, with
- * no preemptions pending). However, that means we
- * then have to deal with the possible lite-restore
- * of the second port (as we submit the ELSP, there
- * may be a context-switch) but also we may complete
- * the resubmission before the context-switch. Ergo,
- * coalescing onto the second port will cause a
- * preemption event, but we cannot predict whether
- * that will affect port[0] or port[1].
- *
- * If the second port is already active, we can wait
- * until the next context-switch before contemplating
- * new requests. The GPU will be busy and we should be
- * able to resubmit the new ELSP before it idles,
- * avoiding pipeline bubbles (momentary pauses where
- * the driver is unable to keep up the supply of new
- * work). However, we have to double check that the
- * priorities of the ports haven't been switch.
- */
- if (port_count(&port[1]))
- return;
+ /*
+ * If we need to return to the preempted context, we
+ * need to skip the lite-restore and force it to
+ * reload the RING_TAIL. Otherwise, the HW has a
+ * tendency to ignore us rewinding the TAIL to the
+ * end of an earlier request.
+ */
+ last->context->lrc_desc |= CTX_DESC_FORCE_RESTORE;
+ last = NULL;
+ } else if (need_timeslice(engine, last) &&
+ timer_expired(&engine->execlists.timer)) {
+ ENGINE_TRACE(engine,
+ "expired last=%llx:%lld, prio=%d, hint=%d\n",
+ last->fence.context,
+ last->fence.seqno,
+ last->sched.attr.priority,
+ execlists->queue_priority_hint);
+
+ ring_set_paused(engine, 1);
+ defer_active(engine);
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
+ /*
+ * Unlike for preemption, if we rewind and continue
+ * executing the same context as previously active,
+ * the order of execution will remain the same and
+ * the tail will only advance. We do not need to
+ * force a full context restore, as a lite-restore
+ * is sufficient to resample the monotonic TAIL.
+ *
+ * If we switch to any other context, similarly we
+ * will not rewind TAIL of current context, and
+ * normal save/restore will preserve state and allow
+ * us to later continue executing the same request.
+ */
+ last = NULL;
+ } else {
+ /*
+ * Otherwise if we already have a request pending
+ * for execution after the current one, we can
+ * just wait until the next CS event before
+ * queuing more. In either case we will force a
+ * lite-restore preemption event, but if we wait
+ * we hopefully coalesce several updates into a single
+ * submission.
+ */
+ if (!list_is_last(&last->sched.link,
+ &engine->active.requests)) {
+ /*
+ * Even if ELSP[1] is occupied and not worthy
+ * of timeslices, our queue might be.
+ */
+ if (!execlists->timer.expires &&
+ need_timeslice(engine, last))
+ set_timer_ms(&execlists->timer,
+ timeslice(engine));
+
+ return;
+ }
+ }
}
while (rb) { /* XXX virtual is always taking precedence */
@@ -946,7 +1913,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(rq != ve->request);
GEM_BUG_ON(rq->engine != &ve->base);
- GEM_BUG_ON(rq->hw_context != &ve->context);
+ GEM_BUG_ON(rq->context != &ve->context);
if (rq_prio(rq) >= queue_prio(execlists)) {
if (!virtual_matches(ve, rq, engine)) {
@@ -957,17 +1924,17 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
- return; /* leave this rq for another engine */
+ return; /* leave this for another */
}
- GEM_TRACE("%s: virtual rq=%llx:%lld%s, new engine? %s\n",
- engine->name,
- rq->fence.context,
- rq->fence.seqno,
- i915_request_completed(rq) ? "!" :
- i915_request_started(rq) ? "*" :
- "",
- yesno(engine != ve->siblings[0]));
+ ENGINE_TRACE(engine,
+ "virtual rq=%llx:%lld%s, new engine? %s\n",
+ rq->fence.context,
+ rq->fence.seqno,
+ i915_request_completed(rq) ? "!" :
+ i915_request_started(rq) ? "*" :
+ "",
+ yesno(engine != ve->siblings[0]));
ve->request = NULL;
ve->base.execlists.queue_priority_hint = INT_MIN;
@@ -982,7 +1949,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
unsigned int n;
GEM_BUG_ON(READ_ONCE(ve->context.inflight));
- virtual_update_register_offsets(regs, engine);
+
+ if (!intel_engine_has_relative_mmio(engine))
+ virtual_update_register_offsets(regs,
+ engine);
if (!list_empty(&ve->context.signals))
virtual_xfer_breadcrumbs(ve, engine);
@@ -1005,10 +1975,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine);
}
- __i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
- submit = true;
- last = rq;
+ if (__i915_request_submit(rq)) {
+ submit = true;
+ last = rq;
+ }
+ i915_request_put(rq);
+
+ /*
+ * Hmm, we have a bunch of virtual engine requests,
+ * but the first one was already completed (thanks
+ * preempt-to-busy!). Keep looking at the veng queue
+ * until we have no more relevant requests (i.e.
+ * the normal submit queue has higher priority).
+ */
+ if (!submit) {
+ spin_unlock(&ve->base.active.lock);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
}
spin_unlock(&ve->base.active.lock);
@@ -1021,6 +2005,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
+ bool merge = true;
+
/*
* Can we combine this request with the current port?
* It has to be the same context/ringbuffer and not
@@ -1046,7 +2032,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* same LRCA, i.e. we must submit 2 different
* contexts if we submit 2 ELSP.
*/
- if (last->hw_context == rq->hw_context)
+ if (last->context == rq->context)
+ goto done;
+
+ if (i915_request_has_sentinel(last))
goto done;
/*
@@ -1056,23 +2045,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* the same context (even though a different
* request) to the second port.
*/
- if (ctx_single_port_submission(last->hw_context) ||
- ctx_single_port_submission(rq->hw_context))
+ if (ctx_single_port_submission(last->context) ||
+ ctx_single_port_submission(rq->context))
goto done;
-
- if (submit)
- port_assign(port, last);
- port++;
-
- GEM_BUG_ON(port_isset(port));
+ merge = false;
}
- __i915_request_submit(rq);
- trace_i915_request_in(rq, port_index(port, execlists));
+ if (__i915_request_submit(rq)) {
+ if (!merge) {
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ port++;
+ last = NULL;
+ }
- last = rq;
- submit = true;
+ GEM_BUG_ON(last &&
+ !can_merge_ctx(last->context,
+ rq->context));
+
+ submit = true;
+ last = rq;
+ }
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1099,52 +2092,47 @@ done:
execlists->queue_priority_hint = queue_prio(execlists);
if (submit) {
- port_assign(port, last);
- execlists_submit_ports(engine);
- }
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ execlists->switch_priority_hint =
+ switch_prio(engine, *execlists->pending);
- /* We must always keep the beast fed if we have work piled up */
- GEM_BUG_ON(rb_first_cached(&execlists->queue) &&
- !port_isset(execlists->port));
+ /*
+ * Skip if we ended up with exactly the same set of requests,
+ * e.g. trying to timeslice a pair of ordered contexts
+ */
+ if (!memcmp(execlists->active, execlists->pending,
+ (port - execlists->pending + 1) * sizeof(*port))) {
+ do
+ execlists_schedule_out(fetch_and_zero(port));
+ while (port-- != execlists->pending);
- /* Re-evaluate the executing context setup after each preemptive kick */
- if (last)
- execlists_user_begin(execlists, execlists->port);
+ goto skip_submit;
+ }
+ clear_ports(port + 1, last_port - port);
- /* If the engine is now idle, so should be the flag; and vice versa. */
- GEM_BUG_ON(execlists_is_active(&engine->execlists,
- EXECLISTS_ACTIVE_USER) ==
- !port_isset(engine->execlists.port));
+ execlists_submit_ports(engine);
+ set_preempt_timeout(engine);
+ } else {
+skip_submit:
+ ring_set_paused(engine, 0);
+ }
}
-void
-execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
{
- struct execlist_port *port = execlists->port;
- unsigned int num_ports = execlists_num_ports(execlists);
+ struct i915_request * const *port;
- while (num_ports-- && port_isset(port)) {
- struct i915_request *rq = port_request(port);
+ for (port = execlists->pending; *port; port++)
+ execlists_schedule_out(*port);
+ clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending));
- GEM_TRACE("%s:port%u fence %llx:%lld, (current %d)\n",
- rq->engine->name,
- (unsigned int)(port - execlists->port),
- rq->fence.context, rq->fence.seqno,
- hwsp_seqno(rq));
+ /* Mark the end of active before we overwrite *active */
+ for (port = xchg(&execlists->active, execlists->pending); *port; port++)
+ execlists_schedule_out(*port);
+ clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight));
- GEM_BUG_ON(!execlists->active);
- execlists_context_schedule_out(rq,
- i915_request_completed(rq) ?
- INTEL_CONTEXT_SCHEDULE_OUT :
- INTEL_CONTEXT_SCHEDULE_PREEMPTED);
-
- i915_request_put(rq);
-
- memset(port, 0, sizeof(*port));
- port++;
- }
-
- execlists_clear_all_active(execlists);
+ WRITE_ONCE(execlists->active, execlists->inflight);
}
static inline void
@@ -1160,16 +2148,83 @@ reset_in_progress(const struct intel_engine_execlists *execlists)
return unlikely(!__tasklet_is_enabled(&execlists->tasklet));
}
+/*
+ * Starting with Gen12, the status has a new format:
+ *
+ * bit 0: switched to new queue
+ * bit 1: reserved
+ * bit 2: semaphore wait mode (poll or signal), only valid when
+ * switch detail is set to "wait on semaphore"
+ * bits 3-5: engine class
+ * bits 6-11: engine instance
+ * bits 12-14: reserved
+ * bits 15-25: sw context id of the lrc the GT switched to
+ * bits 26-31: sw counter of the lrc the GT switched to
+ * bits 32-35: context switch detail
+ * - 0: ctx complete
+ * - 1: wait on sync flip
+ * - 2: wait on vblank
+ * - 3: wait on scanline
+ * - 4: wait on semaphore
+ * - 5: context preempted (not on SEMAPHORE_WAIT or
+ * WAIT_FOR_EVENT)
+ * bit 36: reserved
+ * bits 37-43: wait detail (for switch detail 1 to 4)
+ * bits 44-46: reserved
+ * bits 47-57: sw context id of the lrc the GT switched away from
+ * bits 58-63: sw counter of the lrc the GT switched away from
+ */
+static inline bool
+gen12_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+ u32 lower_dw = csb[0];
+ u32 upper_dw = csb[1];
+ bool ctx_to_valid = GEN12_CSB_CTX_VALID(lower_dw);
+ bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_dw);
+ bool new_queue = lower_dw & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE;
+
+ /*
+ * The context switch detail is not guaranteed to be 5 when a preemption
+ * occurs, so we can't just check for that. The check below works for
+ * all the cases we care about, including preemptions of WAIT
+ * instructions and lite-restore. Preempt-to-idle via the CTRL register
+ * would require some extra handling, but we don't support that.
+ */
+ if (!ctx_away_valid || new_queue) {
+ GEM_BUG_ON(!ctx_to_valid);
+ return true;
+ }
+
+ /*
+ * switch detail = 5 is covered by the case above and we do not expect a
+ * context switch on an unsuccessful wait instruction since we always
+ * use polling mode.
+ */
+ GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_dw));
+ return false;
+}
+
+static inline bool
+gen8_csb_parse(const struct intel_engine_execlists *execlists, const u32 *csb)
+{
+ return *csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED);
+}
+
static void process_csb(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
- struct execlist_port *port = execlists->port;
const u32 * const buf = execlists->csb_status;
const u8 num_entries = execlists->csb_size;
u8 head, tail;
- lockdep_assert_held(&engine->active.lock);
- GEM_BUG_ON(USES_GUC_SUBMISSION(engine->i915));
+ /*
+ * As we modify our execlists state tracking we require exclusive
+ * access. Either we are inside the tasklet, or the tasklet is disabled
+ * and we assume that is only inside the reset paths and so serialised.
+ */
+ GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) &&
+ !reset_in_progress(execlists));
+ GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine));
/*
* Note that csb_write, csb_status may be either in HWSP or mmio.
@@ -1183,7 +2238,7 @@ static void process_csb(struct intel_engine_cs *engine)
*/
head = execlists->csb_head;
tail = READ_ONCE(*execlists->csb_write);
- GEM_TRACE("%s cs-irq head=%d, tail=%d\n", engine->name, head, tail);
+ ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail);
if (unlikely(head == tail))
return;
@@ -1198,9 +2253,7 @@ static void process_csb(struct intel_engine_cs *engine)
rmb();
do {
- struct i915_request *rq;
- unsigned int status;
- unsigned int count;
+ bool promote;
if (++head == num_entries)
head = 0;
@@ -1223,68 +2276,41 @@ static void process_csb(struct intel_engine_cs *engine)
* status notifier.
*/
- GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
- engine->name, head,
- buf[2 * head + 0], buf[2 * head + 1],
- execlists->active);
-
- status = buf[2 * head];
- if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
- GEN8_CTX_STATUS_PREEMPTED))
- execlists_set_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
- if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
- execlists_clear_active(execlists,
- EXECLISTS_ACTIVE_HWACK);
-
- if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
- continue;
-
- /* We should never get a COMPLETED | IDLE_ACTIVE! */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
-
- if (status & GEN8_CTX_STATUS_COMPLETE &&
- buf[2*head + 1] == execlists->preempt_complete_status) {
- GEM_TRACE("%s preempt-idle\n", engine->name);
- complete_preempt_context(execlists);
- continue;
- }
-
- if (status & GEN8_CTX_STATUS_PREEMPTED &&
- execlists_is_active(execlists,
- EXECLISTS_ACTIVE_PREEMPT))
- continue;
-
- GEM_BUG_ON(!execlists_is_active(execlists,
- EXECLISTS_ACTIVE_USER));
-
- rq = port_unpack(port, &count);
- GEM_TRACE("%s out[0]: ctx=%d.%d, fence %llx:%lld (current %d), prio=%d\n",
- engine->name,
- port->context_id, count,
- rq ? rq->fence.context : 0,
- rq ? rq->fence.seqno : 0,
- rq ? hwsp_seqno(rq) : 0,
- rq ? rq_prio(rq) : 0);
+ ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n",
+ head, buf[2 * head + 0], buf[2 * head + 1]);
- /* Check the context/desc id for this event matches */
- GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+ if (INTEL_GEN(engine->i915) >= 12)
+ promote = gen12_csb_parse(execlists, buf + 2 * head);
+ else
+ promote = gen8_csb_parse(execlists, buf + 2 * head);
+ if (promote) {
+ struct i915_request * const *old = execlists->active;
+
+ /* Point active to the new ELSP; prevent overwriting */
+ WRITE_ONCE(execlists->active, execlists->pending);
+
+ if (!inject_preempt_hang(execlists))
+ ring_set_paused(engine, 0);
+
+ /* cancel old inflight, prepare for switch */
+ trace_ports(execlists, "preempted", old);
+ while (*old)
+ execlists_schedule_out(*old++);
+
+ /* switch pending to inflight */
+ GEM_BUG_ON(!assert_pending_valid(execlists, "promote"));
+ WRITE_ONCE(execlists->active,
+ memcpy(execlists->inflight,
+ execlists->pending,
+ execlists_num_ports(execlists) *
+ sizeof(*execlists->pending)));
+
+ WRITE_ONCE(execlists->pending[0], NULL);
+ } else {
+ GEM_BUG_ON(!*execlists->active);
- GEM_BUG_ON(count == 0);
- if (--count == 0) {
- /*
- * On the final event corresponding to the
- * submission of this context, we expect either
- * an element-switch event or a completion
- * event (and on completion, the active-idle
- * marker). No more preemptions, lite-restore
- * or otherwise.
- */
- GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
- GEM_BUG_ON(port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
- GEM_BUG_ON(!port_isset(&port[1]) &&
- !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+ /* port0 completed, advanced to port1 */
+ trace_ports(execlists, "completed", execlists->active);
/*
* We rely on the hardware being strongly
@@ -1292,26 +2318,17 @@ static void process_csb(struct intel_engine_cs *engine)
* coherent (visible from the CPU) before the
* user interrupt and CSB is processed.
*/
- GEM_BUG_ON(!i915_request_completed(rq));
-
- execlists_context_schedule_out(rq,
- INTEL_CONTEXT_SCHEDULE_OUT);
- i915_request_put(rq);
+ GEM_BUG_ON(!i915_request_completed(*execlists->active) &&
+ !reset_in_progress(execlists));
+ execlists_schedule_out(*execlists->active++);
- GEM_TRACE("%s completed ctx=%d\n",
- engine->name, port->context_id);
-
- port = execlists_port_complete(execlists, port);
- if (port_isset(port))
- execlists_user_begin(execlists, port);
- else
- execlists_user_end(execlists);
- } else {
- port_set(port, port_pack(rq, count));
+ GEM_BUG_ON(execlists->active - execlists->inflight >
+ execlists_num_ports(execlists));
}
} while (head != tail);
execlists->csb_head = head;
+ set_timeslice(engine);
/*
* Gen11 has proven to fail wrt global observation point between
@@ -1330,10 +2347,356 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
-
- process_csb(engine);
- if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
+ if (!engine->execlists.pending[0]) {
+ rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
+ rcu_read_unlock();
+ }
+}
+
+static void __execlists_hold(struct i915_request *rq)
+{
+ LIST_HEAD(list);
+
+ do {
+ struct i915_dependency *p;
+
+ if (i915_request_is_active(rq))
+ __i915_request_unsubmit(rq);
+
+ RQ_TRACE(rq, "on hold\n");
+ clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+ i915_request_set_hold(rq);
+
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ /* Leave semaphores spinning on the other engines */
+ if (w->engine != rq->engine)
+ continue;
+
+ if (!i915_request_is_ready(w))
+ continue;
+
+ if (i915_request_completed(w))
+ continue;
+
+ if (i915_request_on_hold(rq))
+ continue;
+
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static bool execlists_hold(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ spin_lock_irq(&engine->active.lock);
+
+ if (i915_request_completed(rq)) { /* too late! */
+ rq = NULL;
+ goto unlock;
+ }
+
+ if (rq->engine != engine) { /* preempted virtual engine */
+ struct virtual_engine *ve = to_virtual_engine(rq->engine);
+
+ /*
+ * intel_context_inflight() is only protected by virtue
+ * of process_csb() being called only by the tasklet (or
+ * directly from inside reset while the tasklet is suspended).
+ * Assert that neither of those are allowed to run while we
+ * poke at the request queues.
+ */
+ GEM_BUG_ON(!reset_in_progress(&engine->execlists));
+
+ /*
+ * An unsubmitted request along a virtual engine will
+ * remain on the active (this) engine until we are able
+ * to process the context switch away (and so mark the
+ * context as no longer in flight). That cannot have happened
+ * yet, otherwise we would not be hanging!
+ */
+ spin_lock(&ve->base.active.lock);
+ GEM_BUG_ON(intel_context_inflight(rq->context) != engine);
+ GEM_BUG_ON(ve->request != rq);
+ ve->request = NULL;
+ spin_unlock(&ve->base.active.lock);
+ i915_request_put(rq);
+
+ rq->engine = engine;
+ }
+
+ /*
+ * Transfer this request onto the hold queue to prevent it
+ * being resumbitted to HW (and potentially completed) before we have
+ * released it. Since we may have already submitted following
+ * requests, we need to remove those as well.
+ */
+ GEM_BUG_ON(i915_request_on_hold(rq));
+ GEM_BUG_ON(rq->engine != engine);
+ __execlists_hold(rq);
+
+unlock:
+ spin_unlock_irq(&engine->active.lock);
+ return rq;
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+ struct i915_dependency *p;
+
+ /*
+ * If one of our ancestors is on hold, we must also be on hold,
+ * otherwise we will bypass it and execute before it.
+ */
+ list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+ const struct i915_request *s =
+ container_of(p->signaler, typeof(*s), sched);
+
+ if (s->engine != rq->engine)
+ continue;
+
+ if (i915_request_on_hold(s))
+ return true;
+ }
+
+ return false;
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+ LIST_HEAD(list);
+
+ do {
+ struct i915_dependency *p;
+
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+ GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+ i915_request_clear_hold(rq);
+ list_move_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(rq->engine,
+ rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+ RQ_TRACE(rq, "hold release\n");
+
+ /* Also release any children on this engine that are ready */
+ list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+ struct i915_request *w =
+ container_of(p->waiter, typeof(*w), sched);
+
+ if (w->engine != rq->engine)
+ continue;
+
+ if (!i915_request_on_hold(rq))
+ continue;
+
+ /* Check that no other parents are also on hold */
+ if (hold_request(rq))
+ continue;
+
+ list_move_tail(&w->sched.link, &list);
+ }
+
+ rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+ } while (rq);
+}
+
+static void execlists_unhold(struct intel_engine_cs *engine,
+ struct i915_request *rq)
+{
+ spin_lock_irq(&engine->active.lock);
+
+ /*
+ * Move this request back to the priority queue, and all of its
+ * children and grandchildren that were suspended along with it.
+ */
+ __execlists_unhold(rq);
+
+ if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+ engine->execlists.queue_priority_hint = rq_prio(rq);
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+ }
+
+ spin_unlock_irq(&engine->active.lock);
+}
+
+struct execlists_capture {
+ struct work_struct work;
+ struct i915_request *rq;
+ struct i915_gpu_coredump *error;
+};
+
+static void execlists_capture_work(struct work_struct *work)
+{
+ struct execlists_capture *cap = container_of(work, typeof(*cap), work);
+ const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+ struct intel_engine_cs *engine = cap->rq->engine;
+ struct intel_gt_coredump *gt = cap->error->gt;
+ struct intel_engine_capture_vma *vma;
+
+ /* Compress all the objects attached to the request, slow! */
+ vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp);
+ if (vma) {
+ struct i915_vma_compress *compress =
+ i915_vma_capture_prepare(gt);
+
+ intel_engine_coredump_add_vma(gt->engine, vma, compress);
+ i915_vma_capture_finish(gt, compress);
+ }
+
+ gt->simulated = gt->engine->simulated;
+ cap->error->simulated = gt->simulated;
+
+ /* Publish the error state, and announce it to the world */
+ i915_error_state_store(cap->error);
+ i915_gpu_coredump_put(cap->error);
+
+ /* Return this request and all that depend upon it for signaling */
+ execlists_unhold(engine, cap->rq);
+ i915_request_put(cap->rq);
+
+ kfree(cap);
+}
+
+static struct execlists_capture *capture_regs(struct intel_engine_cs *engine)
+{
+ const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
+ struct execlists_capture *cap;
+
+ cap = kmalloc(sizeof(*cap), gfp);
+ if (!cap)
+ return NULL;
+
+ cap->error = i915_gpu_coredump_alloc(engine->i915, gfp);
+ if (!cap->error)
+ goto err_cap;
+
+ cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp);
+ if (!cap->error->gt)
+ goto err_gpu;
+
+ cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp);
+ if (!cap->error->gt->engine)
+ goto err_gt;
+
+ return cap;
+
+err_gt:
+ kfree(cap->error->gt);
+err_gpu:
+ kfree(cap->error);
+err_cap:
+ kfree(cap);
+ return NULL;
+}
+
+static bool execlists_capture(struct intel_engine_cs *engine)
+{
+ struct execlists_capture *cap;
+
+ if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR))
+ return true;
+
+ /*
+ * We need to _quickly_ capture the engine state before we reset.
+ * We are inside an atomic section (softirq) here and we are delaying
+ * the forced preemption event.
+ */
+ cap = capture_regs(engine);
+ if (!cap)
+ return true;
+
+ cap->rq = execlists_active(&engine->execlists);
+ GEM_BUG_ON(!cap->rq);
+
+ rcu_read_lock();
+ cap->rq = active_request(cap->rq->context->timeline, cap->rq);
+ cap->rq = i915_request_get_rcu(cap->rq);
+ rcu_read_unlock();
+ if (!cap->rq)
+ goto err_free;
+
+ /*
+ * Remove the request from the execlists queue, and take ownership
+ * of the request. We pass it to our worker who will _slowly_ compress
+ * all the pages the _user_ requested for debugging their batch, after
+ * which we return it to the queue for signaling.
+ *
+ * By removing them from the execlists queue, we also remove the
+ * requests from being processed by __unwind_incomplete_requests()
+ * during the intel_engine_reset(), and so they will *not* be replayed
+ * afterwards.
+ *
+ * Note that because we have not yet reset the engine at this point,
+ * it is possible for the request that we have identified as being
+ * guilty, did in fact complete and we will then hit an arbitration
+ * point allowing the outstanding preemption to succeed. The likelihood
+ * of that is very low (as capturing of the engine registers should be
+ * fast enough to run inside an irq-off atomic section!), so we will
+ * simply hold that request accountable for being non-preemptible
+ * long enough to force the reset.
+ */
+ if (!execlists_hold(engine, cap->rq))
+ goto err_rq;
+
+ INIT_WORK(&cap->work, execlists_capture_work);
+ schedule_work(&cap->work);
+ return true;
+
+err_rq:
+ i915_request_put(cap->rq);
+err_free:
+ i915_gpu_coredump_put(cap->error);
+ kfree(cap);
+ return false;
+}
+
+static noinline void preempt_reset(struct intel_engine_cs *engine)
+{
+ const unsigned int bit = I915_RESET_ENGINE + engine->id;
+ unsigned long *lock = &engine->gt->reset.flags;
+
+ if (i915_modparams.reset < 3)
+ return;
+
+ if (test_and_set_bit(bit, lock))
+ return;
+
+ /* Mark this tasklet as disabled to avoid waiting for it to complete */
+ tasklet_disable_nosync(&engine->execlists.tasklet);
+
+ ENGINE_TRACE(engine, "preempt timeout %lu+%ums\n",
+ READ_ONCE(engine->props.preempt_timeout_ms),
+ jiffies_to_msecs(jiffies - engine->execlists.preempt.expires));
+
+ ring_set_paused(engine, 1); /* Freeze the current request in place */
+ if (execlists_capture(engine))
+ intel_engine_reset(engine, "preemption time out");
+ else
+ ring_set_paused(engine, 0);
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(bit, lock);
+}
+
+static bool preempt_timeout(const struct intel_engine_cs *const engine)
+{
+ const struct timer_list *t = &engine->execlists.preempt;
+
+ if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
+ return false;
+
+ if (!timer_expired(t))
+ return false;
+
+ return READ_ONCE(engine->execlists.pending[0]);
}
/*
@@ -1343,24 +2706,48 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
static void execlists_submission_tasklet(unsigned long data)
{
struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
- unsigned long flags;
+ bool timeout = preempt_timeout(engine);
+
+ process_csb(engine);
+ if (!READ_ONCE(engine->execlists.pending[0]) || timeout) {
+ unsigned long flags;
- GEM_TRACE("%s awake?=%d, active=%x\n",
- engine->name,
- !!intel_wakeref_active(&engine->wakeref),
- engine->execlists.active);
+ spin_lock_irqsave(&engine->active.lock, flags);
+ __execlists_submission_tasklet(engine);
+ spin_unlock_irqrestore(&engine->active.lock, flags);
- spin_lock_irqsave(&engine->active.lock, flags);
- __execlists_submission_tasklet(engine);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ /* Recheck after serialising with direct-submission */
+ if (timeout && preempt_timeout(engine))
+ preempt_reset(engine);
+ }
+}
+
+static void __execlists_kick(struct intel_engine_execlists *execlists)
+{
+ /* Kick the tasklet for some interrupt coalescing and reset handling */
+ tasklet_hi_schedule(&execlists->tasklet);
+}
+
+#define execlists_kick(t, member) \
+ __execlists_kick(container_of(t, struct intel_engine_execlists, member))
+
+static void execlists_timeslice(struct timer_list *timer)
+{
+ execlists_kick(timer, timer);
+}
+
+static void execlists_preempt(struct timer_list *timer)
+{
+ execlists_kick(timer, preempt);
}
static void queue_request(struct intel_engine_cs *engine,
- struct i915_sched_node *node,
- int prio)
+ struct i915_request *rq)
{
- GEM_BUG_ON(!list_empty(&node->link));
- list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio));
+ GEM_BUG_ON(!list_empty(&rq->sched.link));
+ list_add_tail(&rq->sched.link,
+ i915_sched_lookup_priolist(engine, rq_prio(rq)));
+ set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
}
static void __submit_queue_imm(struct intel_engine_cs *engine)
@@ -1376,12 +2763,23 @@ static void __submit_queue_imm(struct intel_engine_cs *engine)
tasklet_hi_schedule(&execlists->tasklet);
}
-static void submit_queue(struct intel_engine_cs *engine, int prio)
+static void submit_queue(struct intel_engine_cs *engine,
+ const struct i915_request *rq)
{
- if (prio > engine->execlists.queue_priority_hint) {
- engine->execlists.queue_priority_hint = prio;
- __submit_queue_imm(engine);
- }
+ struct intel_engine_execlists *execlists = &engine->execlists;
+
+ if (rq_prio(rq) <= execlists->queue_priority_hint)
+ return;
+
+ execlists->queue_priority_hint = rq_prio(rq);
+ __submit_queue_imm(engine);
+}
+
+static bool ancestor_on_hold(const struct intel_engine_cs *engine,
+ const struct i915_request *rq)
+{
+ GEM_BUG_ON(i915_request_on_hold(rq));
+ return !list_empty(&engine->active.hold) && hold_request(rq);
}
static void execlists_submit_request(struct i915_request *request)
@@ -1392,12 +2790,17 @@ static void execlists_submit_request(struct i915_request *request)
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->active.lock, flags);
- queue_request(engine, &request->sched, rq_prio(request));
+ if (unlikely(ancestor_on_hold(engine, request))) {
+ list_add_tail(&request->sched.link, &engine->active.hold);
+ i915_request_set_hold(request);
+ } else {
+ queue_request(engine, request);
- GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
- GEM_BUG_ON(list_empty(&request->sched.link));
+ GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+ GEM_BUG_ON(list_empty(&request->sched.link));
- submit_queue(engine, rq_prio(request));
+ submit_queue(engine, request);
+ }
spin_unlock_irqrestore(&engine->active.lock, flags);
}
@@ -1405,9 +2808,7 @@ static void execlists_submit_request(struct i915_request *request)
static void __execlists_context_fini(struct intel_context *ce)
{
intel_ring_put(ce->ring);
-
- GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
- i915_gem_object_put(ce->state->obj);
+ i915_vma_put(ce->state);
}
static void execlists_context_destroy(struct kref *kref)
@@ -1420,18 +2821,46 @@ static void execlists_context_destroy(struct kref *kref)
if (ce->state)
__execlists_context_fini(ce);
+ intel_context_fini(ce);
intel_context_free(ce);
}
+static void
+set_redzone(void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += engine->context_size;
+
+ memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE);
+}
+
+static void
+check_redzone(const void *vaddr, const struct intel_engine_cs *engine)
+{
+ if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ return;
+
+ vaddr += engine->context_size;
+
+ if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE))
+ dev_err_once(engine->i915->drm.dev,
+ "%s context redzone overwritten!\n",
+ engine->name);
+}
+
static void execlists_context_unpin(struct intel_context *ce)
{
- i915_gem_context_unpin_hw_id(ce->gem_context);
+ check_redzone((void *)ce->lrc_reg_state - LRC_STATE_PN * PAGE_SIZE,
+ ce->engine);
+
i915_gem_object_unpin_map(ce->state->obj);
}
static void
-__execlists_update_reg_state(struct intel_context *ce,
- struct intel_engine_cs *engine)
+__execlists_update_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
{
struct intel_ring *ring = ce->ring;
u32 *regs = ce->lrc_reg_state;
@@ -1439,14 +2868,17 @@ __execlists_update_reg_state(struct intel_context *ce,
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
- regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
- regs[CTX_RING_HEAD + 1] = ring->head;
- regs[CTX_RING_TAIL + 1] = ring->tail;
+ regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
+ regs[CTX_RING_HEAD] = ring->head;
+ regs[CTX_RING_TAIL] = ring->tail;
/* RPCS */
- if (engine->class == RENDER_CLASS)
- regs[CTX_R_PWR_CLK_STATE + 1] =
+ if (engine->class == RENDER_CLASS) {
+ regs[CTX_R_PWR_CLK_STATE] =
intel_sseu_make_rpcs(engine->i915, &ce->sseu);
+
+ i915_oa_init_reg_state(ce, engine);
+ }
}
static int
@@ -1454,46 +2886,21 @@ __execlists_context_pin(struct intel_context *ce,
struct intel_engine_cs *engine)
{
void *vaddr;
- int ret;
-
- GEM_BUG_ON(!ce->gem_context->vm);
- ret = execlists_context_deferred_alloc(ce, engine);
- if (ret)
- goto err;
GEM_BUG_ON(!ce->state);
-
- ret = intel_context_active_acquire(ce,
- engine->i915->ggtt.pin_bias |
- PIN_OFFSET_BIAS |
- PIN_HIGH);
- if (ret)
- goto err;
+ GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
vaddr = i915_gem_object_pin_map(ce->state->obj,
i915_coherent_map_type(engine->i915) |
I915_MAP_OVERRIDE);
- if (IS_ERR(vaddr)) {
- ret = PTR_ERR(vaddr);
- goto unpin_active;
- }
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
- ret = i915_gem_context_pin_hw_id(ce->gem_context);
- if (ret)
- goto unpin_map;
-
- ce->lrc_desc = lrc_descriptor(ce, engine);
+ ce->lrc_desc = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE;
ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE;
__execlists_update_reg_state(ce, engine);
return 0;
-
-unpin_map:
- i915_gem_object_unpin_map(ce->state->obj);
-unpin_active:
- intel_context_active_release(ce);
-err:
- return ret;
}
static int execlists_context_pin(struct intel_context *ce)
@@ -1501,8 +2908,16 @@ static int execlists_context_pin(struct intel_context *ce)
return __execlists_context_pin(ce, ce->engine);
}
+static int execlists_context_alloc(struct intel_context *ce)
+{
+ return __execlists_context_alloc(ce, ce->engine);
+}
+
static void execlists_context_reset(struct intel_context *ce)
{
+ CE_TRACE(ce, "reset\n");
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
/*
* Because we emit WA_TAIL_DWORDS there may be a disparity
* between our bookkeeping in ce->ring->head and ce->ring->tail and
@@ -1519,11 +2934,19 @@ static void execlists_context_reset(struct intel_context *ce)
* So to avoid that we reset the context images upon resume. For
* simplicity, we just zero everything out.
*/
- intel_ring_reset(ce->ring, 0);
+ intel_ring_reset(ce->ring, ce->ring->emit);
+
+ /* Scrub away the garbage */
+ execlists_init_reg_state(ce->lrc_reg_state,
+ ce, ce->engine, ce->ring, true);
__execlists_update_reg_state(ce, ce->engine);
+
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE;
}
static const struct intel_context_ops execlists_context_ops = {
+ .alloc = execlists_context_alloc,
+
.pin = execlists_context_pin,
.unpin = execlists_context_unpin,
@@ -1538,7 +2961,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
- GEM_BUG_ON(!rq->timeline->has_initial_breadcrumb);
+ GEM_BUG_ON(!i915_request_timeline(rq)->has_initial_breadcrumb);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -1554,7 +2977,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
*cs++ = MI_NOOP;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
- *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = i915_request_timeline(rq)->hwsp_offset;
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
@@ -1566,66 +2989,11 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
-static int emit_pdps(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- struct i915_ppgtt * const ppgtt =
- i915_vm_to_ppgtt(rq->gem_context->vm);
- int err, i;
- u32 *cs;
-
- GEM_BUG_ON(intel_vgpu_active(rq->i915));
-
- /*
- * Beware ye of the dragons, this sequence is magic!
- *
- * Small changes to this sequence can cause anything from
- * GPU hangs to forcewake errors and machine lockups!
- */
-
- /* Flush any residual operations from the context load */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Magic required to prevent forcewake errors! */
- err = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (err)
- return err;
-
- cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Ensure the LRI have landed before we invalidate & continue */
- *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED;
- for (i = GEN8_3LVL_PDPES; i--; ) {
- const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
- u32 base = engine->mmio_base;
-
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i));
- *cs++ = upper_32_bits(pd_daddr);
- *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i));
- *cs++ = lower_32_bits(pd_daddr);
- }
- *cs++ = MI_NOOP;
-
- intel_ring_advance(rq, cs);
-
- /* Be doubly sure the LRI have landed before proceeding */
- err = engine->emit_flush(rq, EMIT_FLUSH);
- if (err)
- return err;
-
- /* Re-invalidate the TLB for luck */
- return engine->emit_flush(rq, EMIT_INVALIDATE);
-}
-
static int execlists_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -1643,10 +3011,7 @@ static int execlists_request_alloc(struct i915_request *request)
*/
/* Unconditionally invalidate GPU caches and TLBs. */
- if (i915_vm_is_4lvl(request->gem_context->vm))
- ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
- else
- ret = emit_pdps(request);
+ ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
@@ -1676,7 +3041,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
/* NB no one else is allowed to scribble over scratch + 256! */
*batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_scratch_offset(engine->i915) + 256;
+ *batch++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
*batch++ = 0;
*batch++ = MI_LOAD_REGISTER_IMM(1);
@@ -1690,7 +3056,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
*batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
*batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
- *batch++ = i915_scratch_offset(engine->i915) + 256;
+ *batch++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA);
*batch++ = 0;
return batch;
@@ -1724,11 +3091,10 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* Actual scratch location is at 128 bytes offset */
batch = gen8_emit_pipe_control(batch,
PIPE_CONTROL_FLUSH_L3 |
- PIPE_CONTROL_GLOBAL_GTT_IVB |
+ PIPE_CONTROL_STORE_DATA_INDEX |
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_QW_WRITE,
- i915_scratch_offset(engine->i915) +
- 2 * CACHELINE_BYTES);
+ LRC_PPHWSP_SCRATCH_ADDR);
*batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1794,6 +3160,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+ /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */
+ batch = gen8_emit_pipe_control(batch,
+ PIPE_CONTROL_FLUSH_L3 |
+ PIPE_CONTROL_STORE_DATA_INDEX |
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_QW_WRITE,
+ LRC_PPHWSP_SCRATCH_ADDR);
+
batch = emit_lri(batch, lri, ARRAY_SIZE(lri));
/* WaMediaPoolStateCmdInWABB:bxt,glk */
@@ -1874,7 +3248,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
if (IS_ERR(obj))
return PTR_ERR(obj);
- vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err;
@@ -1914,6 +3288,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
return 0;
switch (INTEL_GEN(engine->i915)) {
+ case 12:
case 11:
return 0;
case 10:
@@ -1970,30 +3345,33 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine)
static void enable_execlists(struct intel_engine_cs *engine)
{
+ u32 mode;
+
+ assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
+
intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
if (INTEL_GEN(engine->i915) >= 11)
- ENGINE_WRITE(engine,
- RING_MODE_GEN7,
- _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
+ mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE);
else
- ENGINE_WRITE(engine,
- RING_MODE_GEN7,
- _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+ mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE);
+ ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode);
- ENGINE_WRITE(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
+ ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
- ENGINE_WRITE(engine,
- RING_HWS_PGA,
- i915_ggtt_offset(engine->status_page.vma));
+ ENGINE_WRITE_FW(engine,
+ RING_HWS_PGA,
+ i915_ggtt_offset(engine->status_page.vma));
ENGINE_POSTING_READ(engine, RING_HWS_PGA);
+
+ engine->context_tag = 0;
}
static bool unexpected_starting_state(struct intel_engine_cs *engine)
{
bool unexpected = false;
- if (ENGINE_READ(engine, RING_MI_MODE) & STOP_RING) {
+ if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) {
DRM_DEBUG_DRIVER("STOP_RING still set in RING_MI_MODE\n");
unexpected = true;
}
@@ -2026,8 +3404,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
struct intel_engine_execlists * const execlists = &engine->execlists;
unsigned long flags;
- GEM_TRACE("%s: depth<-%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth<-%d\n",
+ atomic_read(&execlists->tasklet.count));
/*
* Prevent request submission to the hardware until we have
@@ -2041,34 +3419,32 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine)
__tasklet_disable_sync_once(&execlists->tasklet);
GEM_BUG_ON(!reset_in_progress(execlists));
- intel_engine_stop_cs(engine);
-
/* And flush any current direct submission. */
spin_lock_irqsave(&engine->active.lock, flags);
spin_unlock_irqrestore(&engine->active.lock, flags);
-}
-
-static bool lrc_regs_ok(const struct i915_request *rq)
-{
- const struct intel_ring *ring = rq->ring;
- const u32 *regs = rq->hw_context->lrc_reg_state;
-
- /* Quick spot check for the common signs of context corruption */
-
- if (regs[CTX_RING_BUFFER_CONTROL + 1] !=
- (RING_CTL_SIZE(ring->size) | RING_VALID))
- return false;
-
- if (regs[CTX_RING_BUFFER_START + 1] != i915_ggtt_offset(ring->vma))
- return false;
- return true;
+ /*
+ * We stop engines, otherwise we might get failed reset and a
+ * dead gpu (on elk). Also as modern gpu as kbl can suffer
+ * from system hang if batchbuffer is progressing when
+ * the reset is issued, regardless of READY_TO_RESET ack.
+ * Thus assume it is best to stop engines on all gens
+ * where we have a gpu reset.
+ *
+ * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+ *
+ * FIXME: Wa for more modern gens needs to be validated
+ */
+ intel_engine_stop_cs(engine);
}
-static void reset_csb_pointers(struct intel_engine_execlists *execlists)
+static void reset_csb_pointers(struct intel_engine_cs *engine)
{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
const unsigned int reset_value = execlists->csb_size - 1;
+ ring_set_paused(engine, 0);
+
/*
* After a reset, the HW starts writing into CSB entry [0]. We
* therefore have to set our HEAD pointer back one entry so that
@@ -2082,27 +3458,35 @@ static void reset_csb_pointers(struct intel_engine_execlists *execlists)
WRITE_ONCE(*execlists->csb_write, reset_value);
wmb(); /* Make sure this is visible to HW (paranoia?) */
+ /*
+ * Sometimes Icelake forgets to reset its pointers on a GPU reset.
+ * Bludgeon them with a mmio update to be sure.
+ */
+ ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR,
+ reset_value << 8 | reset_value);
+ ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR);
+
invalidate_csb_entries(&execlists->csb_status[0],
&execlists->csb_status[reset_value]);
}
-static struct i915_request *active_request(struct i915_request *rq)
+static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine)
{
- const struct list_head * const list = &rq->engine->active.requests;
- const struct intel_context * const context = rq->hw_context;
- struct i915_request *active = NULL;
-
- list_for_each_entry_from_reverse(rq, list, sched.link) {
- if (i915_request_completed(rq))
- break;
+ int x;
- if (rq->hw_context != context)
- break;
-
- active = rq;
+ x = lrc_ring_mi_mode(engine);
+ if (x != -1) {
+ regs[x + 1] &= ~STOP_RING;
+ regs[x + 1] |= STOP_RING << 16;
}
+}
- return active;
+static void __execlists_reset_reg_state(const struct intel_context *ce,
+ const struct intel_engine_cs *engine)
+{
+ u32 *regs = ce->lrc_reg_state;
+
+ __reset_stop_ring(regs, engine);
}
static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
@@ -2110,38 +3494,42 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
struct intel_engine_execlists * const execlists = &engine->execlists;
struct intel_context *ce;
struct i915_request *rq;
- u32 *regs;
+
+ mb(); /* paranoia: read the CSB pointers from after the reset */
+ clflush(execlists->csb_write);
+ mb();
process_csb(engine); /* drain preemption events */
/* Following the reset, we need to reload the CSB read/write pointers */
- reset_csb_pointers(&engine->execlists);
+ reset_csb_pointers(engine);
/*
* Save the currently executing context, even if we completed
* its request, it was still running at the time of the
* reset and will have been clobbered.
*/
- if (!port_isset(execlists->port))
- goto out_clear;
+ rq = execlists_active(execlists);
+ if (!rq)
+ goto unwind;
- rq = port_request(execlists->port);
- ce = rq->hw_context;
+ /* We still have requests in-flight; the engine should be active */
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
- /*
- * Catch up with any missed context-switch interrupts.
- *
- * Ideally we would just read the remaining CSB entries now that we
- * know the gpu is idle. However, the CSB registers are sometimes^W
- * often trashed across a GPU reset! Instead we have to rely on
- * guessing the missed context-switch events by looking at what
- * requests were completed.
- */
- execlists_cancel_port_requests(execlists);
+ ce = rq->context;
+ GEM_BUG_ON(!i915_vma_is_pinned(ce->state));
- rq = active_request(rq);
- if (!rq)
+ if (i915_request_completed(rq)) {
+ /* Idle context; tidy up the ring so we can restart afresh */
+ ce->ring->head = intel_ring_wrap(ce->ring, rq->tail);
goto out_replay;
+ }
+
+ /* Context has requests still in-flight; it should not be idle! */
+ GEM_BUG_ON(i915_active_is_idle(&ce->active));
+ rq = active_request(ce->timeline, rq);
+ ce->ring->head = intel_ring_wrap(ce->ring, rq->head);
+ GEM_BUG_ON(ce->ring->head == ce->ring->tail);
/*
* If this request hasn't started yet, e.g. it is waiting on a
@@ -2155,7 +3543,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* Otherwise, if we have not started yet, the request should replay
* perfectly and we do not need to flag the result as being erroneous.
*/
- if (!i915_request_started(rq) && lrc_regs_ok(rq))
+ if (!i915_request_started(rq))
goto out_replay;
/*
@@ -2169,8 +3557,8 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* and have to at least restore the RING register in the context
* image back to the expected values to skip over the guilty request.
*/
- i915_reset_request(rq, stalled);
- if (!stalled && lrc_regs_ok(rq))
+ __i915_request_reset(rq, stalled);
+ if (!stalled)
goto out_replay;
/*
@@ -2181,33 +3569,28 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled)
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- regs = ce->lrc_reg_state;
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring);
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+ restore_default_state(ce, engine);
out_replay:
- /* Rerun the request; its payload has been neutered (if guilty). */
- ce->ring->head =
- rq ? intel_ring_wrap(ce->ring, rq->head) : ce->ring->tail;
+ ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n",
+ ce->ring->head, ce->ring->tail);
intel_ring_update_space(ce->ring);
+ __execlists_reset_reg_state(ce, engine);
__execlists_update_reg_state(ce, engine);
+ ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */
+unwind:
/* Push back any incomplete requests for replay after the reset. */
+ cancel_port_requests(execlists);
__unwind_incomplete_requests(engine);
-
-out_clear:
- execlists_clear_all_active(execlists);
}
-static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
+static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
spin_lock_irqsave(&engine->active.lock, flags);
@@ -2221,14 +3604,14 @@ static void nop_submission_tasklet(unsigned long data)
/* The driver is wedged; don't process any more events. */
}
-static void execlists_cancel_requests(struct intel_engine_cs *engine)
+static void execlists_reset_cancel(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct i915_request *rq, *rn;
struct rb_node *rb;
unsigned long flags;
- GEM_TRACE("%s\n", engine->name);
+ ENGINE_TRACE(engine, "\n");
/*
* Before we call engine->cancel_requests(), we should have exclusive
@@ -2249,12 +3632,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
__execlists_reset(engine, true);
/* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2262,16 +3641,18 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
+ mark_eio(rq);
__i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
i915_priolist_free(p);
}
+ /* On-hold requests will be flushed to timeline upon their release */
+ list_for_each_entry(rq, &engine->active.hold, sched.link)
+ mark_eio(rq);
+
/* Cancel all attached virtual engines */
while ((rb = rb_first_cached(&execlists->virtual))) {
struct virtual_engine *ve =
@@ -2281,13 +3662,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
RB_CLEAR_NODE(rb);
spin_lock(&ve->base.active.lock);
- if (ve->request) {
- ve->request->engine = engine;
- __i915_request_submit(ve->request);
- dma_fence_set_error(&ve->request->fence, -EIO);
- i915_request_mark_complete(ve->request);
+ rq = fetch_and_zero(&ve->request);
+ if (rq) {
+ mark_eio(rq);
+
+ rq->engine = engine;
+ __i915_request_submit(rq);
+ i915_request_put(rq);
+
ve->base.execlists.queue_priority_hint = INT_MIN;
- ve->request = NULL;
}
spin_unlock(&ve->base.active.lock);
}
@@ -2296,7 +3679,6 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
- GEM_BUG_ON(port_isset(execlists->port));
GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet));
execlists->tasklet.func = nop_submission_tasklet;
@@ -2320,13 +3702,13 @@ static void execlists_reset_finish(struct intel_engine_cs *engine)
if (__tasklet_enable(&execlists->tasklet))
/* And kick in case we missed a new request submission. */
tasklet_hi_schedule(&execlists->tasklet);
- GEM_TRACE("%s: depth->%d\n", engine->name,
- atomic_read(&execlists->tasklet.count));
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
}
-static int gen8_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+static int gen8_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
u32 *cs;
@@ -2360,7 +3742,7 @@ static int gen8_emit_bb_start(struct i915_request *rq,
return 0;
}
-static int gen9_emit_bb_start(struct i915_request *rq,
+static int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
@@ -2421,7 +3803,7 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
}
*cs++ = cmd;
- *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+ *cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(request, cs);
@@ -2432,9 +3814,6 @@ static int gen8_emit_flush(struct i915_request *request, u32 mode)
static int gen8_emit_flush_render(struct i915_request *request,
u32 mode)
{
- struct intel_engine_cs *engine = request->engine;
- u32 scratch_addr =
- i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
@@ -2456,7 +3835,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
@@ -2489,7 +3868,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
- cs = gen8_emit_pipe_control(cs, flags, scratch_addr);
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
@@ -2499,6 +3878,150 @@ static int gen8_emit_flush_render(struct i915_request *request,
return 0;
}
+static int gen11_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ if (mode & EMIT_FLUSH) {
+ u32 *cs;
+ u32 flags = 0;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ u32 *cs;
+ u32 flags = 0;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+
+ return 0;
+}
+
+static u32 preparser_disable(bool state)
+{
+ return MI_ARB_CHECK | 1 << 8 | state;
+}
+
+static int gen12_emit_flush_render(struct i915_request *request,
+ u32 mode)
+{
+ if (mode & EMIT_FLUSH) {
+ u32 flags = 0;
+ u32 *cs;
+
+ flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ /* Wa_1409600907:tgl */
+ flags |= PIPE_CONTROL_DEPTH_STALL;
+ flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+
+ if (mode & EMIT_INVALIDATE) {
+ u32 flags = 0;
+ u32 *cs;
+
+ flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TLB_INVALIDATE;
+ flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+ flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ flags |= PIPE_CONTROL_CS_STALL;
+
+ cs = intel_ring_begin(request, 8);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ /*
+ * Prevent the pre-parser from skipping past the TLB
+ * invalidate and loading a stale page for the batch
+ * buffer / request payload.
+ */
+ *cs++ = preparser_disable(true);
+
+ cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
+
+ *cs++ = preparser_disable(false);
+ intel_ring_advance(request, cs);
+
+ /*
+ * Wa_1604544889:tgl
+ */
+ if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ flags = 0;
+ flags |= PIPE_CONTROL_CS_STALL;
+ flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
+
+ flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+ flags |= PIPE_CONTROL_QW_WRITE;
+
+ cs = intel_ring_begin(request, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cs = gen8_emit_pipe_control(cs, flags,
+ LRC_PPHWSP_SCRATCH_ADDR);
+ intel_ring_advance(request, cs);
+ }
+ }
+
+ return 0;
+}
+
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
@@ -2514,15 +4037,28 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs)
return cs;
}
-static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs)
{
- cs = gen8_emit_ggtt_write(cs,
- request->fence.seqno,
- request->timeline->hwsp_offset,
- 0);
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = intel_hws_preempt_address(request->engine);
+ *cs++ = 0;
+ return cs;
+}
+
+static __always_inline u32*
+gen8_emit_fini_breadcrumb_footer(struct i915_request *request,
+ u32 *cs)
+{
*cs++ = MI_USER_INTERRUPT;
+
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ if (intel_engine_has_semaphores(request->engine))
+ cs = emit_preempt_busywait(request, cs);
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@@ -2530,22 +4066,92 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
+static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ 0);
+
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
{
+ cs = gen8_emit_pipe_control(cs,
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE,
+ 0);
+
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
request->fence.seqno,
- request->timeline->hwsp_offset,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_CS_STALL);
+
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE);
- cs = gen8_emit_pipe_control(cs,
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_CS_STALL,
- 0);
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE);
+
+ return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
+/*
+ * Note that the CS instruction pre-parser will not stall on the breadcrumb
+ * flush and will continue pre-fetching the instructions after it before the
+ * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
+ * BB_START/END instructions, so, even though we might pre-fetch the pre-amble
+ * of the next request before the memory has been flushed, we're guaranteed that
+ * we won't access the batch itself too early.
+ * However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
+ * so, if the current request is modifying an instruction in the next request on
+ * the same intel_context, we might pre-fetch and then execute the pre-update
+ * instruction. To avoid this, the users of self-modifying code should either
+ * disable the parser around the code emitting the memory writes, via a new flag
+ * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
+ * the in-kernel use-cases we've opted to use a separate context, see
+ * reloc_gpu() as an example.
+ * All the above applies only to the instructions themselves. Non-inline data
+ * used by the instructions is not pre-fetched.
+ */
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = intel_hws_preempt_address(request->engine);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_NOOP;
+
+ return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
*cs++ = MI_USER_INTERRUPT;
+
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ if (intel_engine_has_semaphores(request->engine))
+ cs = gen12_emit_preempt_busywait(request, cs);
request->tail = intel_ring_offset(request, cs);
assert_ring_tail_valid(request->ring, request->tail);
@@ -2553,57 +4159,85 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
return gen8_emit_wa_tail(request, cs);
}
-static int gen8_init_rcs_context(struct i915_request *rq)
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
{
- int ret;
+ cs = gen8_emit_ggtt_write(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ 0);
- ret = intel_engine_emit_ctx_wa(rq);
- if (ret)
- return ret;
+ return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
- ret = intel_rcs_context_init_mocs(rq);
- /*
- * Failing to program the MOCS is non-fatal.The system will not
- * run at peak performance. So generate an error and carry on.
- */
- if (ret)
- DRM_ERROR("MOCS failed to program: expect performance issues.\n");
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+ cs = gen8_emit_ggtt_write_rcs(cs,
+ request->fence.seqno,
+ i915_request_active_timeline(request)->hwsp_offset,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ /* Wa_1409600907:tgl */
+ PIPE_CONTROL_DEPTH_STALL |
+ PIPE_CONTROL_DC_FLUSH_ENABLE |
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_HDC_PIPELINE_FLUSH);
- return i915_gem_render_state_emit(rq);
+ return gen12_emit_fini_breadcrumb_footer(request, cs);
}
static void execlists_park(struct intel_engine_cs *engine)
{
- intel_engine_park(engine);
+ cancel_timer(&engine->execlists.timer);
+ cancel_timer(&engine->execlists.preempt);
}
void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
- engine->cancel_requests = execlists_cancel_requests;
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = execlists_submission_tasklet;
engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
+ engine->reset.rewind = execlists_reset_rewind;
+ engine->reset.cancel = execlists_reset_cancel;
engine->reset.finish = execlists_reset_finish;
engine->park = execlists_park;
engine->unpark = NULL;
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
- if (!intel_vgpu_active(engine->i915))
+ if (!intel_vgpu_active(engine->i915)) {
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (engine->preempt_context &&
- HAS_LOGICAL_RING_PREEMPTION(engine->i915))
- engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
+ engine->flags |= I915_ENGINE_HAS_PREEMPTION;
+ }
+
+ if (INTEL_GEN(engine->i915) >= 12)
+ engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
+
+ if (intel_engine_has_preemption(engine))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen8_emit_bb_start_noarb;
+}
+
+static void execlists_shutdown(struct intel_engine_cs *engine)
+{
+ /* Synchronise with residual timers and any softirq they raise */
+ del_timer_sync(&engine->execlists.timer);
+ del_timer_sync(&engine->execlists.preempt);
+ tasklet_kill(&engine->execlists.tasklet);
}
-static void execlists_destroy(struct intel_engine_cs *engine)
+static void execlists_release(struct intel_engine_cs *engine)
{
+ execlists_shutdown(engine);
+
intel_engine_cleanup_common(engine);
lrc_destroy_wa_ctx(engine);
- kfree(engine);
}
static void
@@ -2611,19 +4245,16 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overriden by each engine. */
- engine->destroy = execlists_destroy;
engine->resume = execlists_resume;
- engine->reset.prepare = execlists_reset_prepare;
- engine->reset.reset = execlists_reset;
- engine->reset.finish = execlists_reset_finish;
-
engine->cops = &execlists_context_ops;
engine->request_alloc = execlists_request_alloc;
engine->emit_flush = gen8_emit_flush;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+ if (INTEL_GEN(engine->i915) >= 12)
+ engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
engine->set_default_submission = intel_execlists_set_default_submission;
@@ -2638,10 +4269,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
* until a more refined solution exists.
*/
}
- if (IS_GEN(engine->i915, 8))
- engine->emit_bb_start = gen8_emit_bb_start;
- else
- engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
@@ -2665,40 +4292,41 @@ logical_ring_default_irqs(struct intel_engine_cs *engine)
engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
}
-int intel_execlists_submission_setup(struct intel_engine_cs *engine)
+static void rcs_submission_override(struct intel_engine_cs *engine)
{
- /* Intentionally left blank. */
- engine->buffer = NULL;
-
- tasklet_init(&engine->execlists.tasklet,
- execlists_submission_tasklet, (unsigned long)engine);
-
- logical_ring_default_vfuncs(engine);
- logical_ring_default_irqs(engine);
-
- if (engine->class == RENDER_CLASS) {
- engine->init_context = gen8_init_rcs_context;
+ switch (INTEL_GEN(engine->i915)) {
+ case 12:
+ engine->emit_flush = gen12_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
+ break;
+ case 11:
+ engine->emit_flush = gen11_emit_flush_render;
+ engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
+ break;
+ default:
engine->emit_flush = gen8_emit_flush_render;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
+ break;
}
-
- return 0;
}
-int intel_execlists_submission_init(struct intel_engine_cs *engine)
+int intel_execlists_submission_setup(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
struct drm_i915_private *i915 = engine->i915;
struct intel_uncore *uncore = engine->uncore;
u32 base = engine->mmio_base;
- int ret;
- ret = intel_engine_init_common(engine);
- if (ret)
- return ret;
+ tasklet_init(&engine->execlists.tasklet,
+ execlists_submission_tasklet, (unsigned long)engine);
+ timer_setup(&engine->execlists.timer, execlists_timeslice, 0);
+ timer_setup(&engine->execlists.preempt, execlists_preempt, 0);
- intel_engine_init_workarounds(engine);
- intel_engine_init_whitelist(engine);
+ logical_ring_default_vfuncs(engine);
+ logical_ring_default_irqs(engine);
+
+ if (engine->class == RENDER_CLASS)
+ rcs_submission_override(engine);
if (intel_init_workaround_bb(engine))
/*
@@ -2718,11 +4346,6 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
i915_mmio_reg_offset(RING_ELSP(base));
}
- execlists->preempt_complete_status = ~0u;
- if (engine->preempt_context)
- execlists->preempt_complete_status =
- upper_32_bits(engine->preempt_context->lrc_desc);
-
execlists->csb_status =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -2734,12 +4357,15 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine)
else
execlists->csb_size = GEN11_CSB_ENTRIES;
- reset_csb_pointers(execlists);
+ reset_csb_pointers(engine);
+
+ /* Finally, take ownership and responsibility for cleanup! */
+ engine->release = execlists_release;
return 0;
}
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
{
u32 indirect_ctx_offset;
@@ -2747,6 +4373,10 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
default:
MISSING_CASE(INTEL_GEN(engine->i915));
/* fall through */
+ case 12:
+ indirect_ctx_offset =
+ GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
+ break;
case 11:
indirect_ctx_offset =
GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
@@ -2768,86 +4398,53 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
return indirect_ctx_offset;
}
-static void execlists_init_reg_state(u32 *regs,
- struct intel_context *ce,
- struct intel_engine_cs *engine,
- struct intel_ring *ring)
-{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->gem_context->vm);
- bool rcs = engine->class == RENDER_CLASS;
- u32 base = engine->mmio_base;
- /*
- * A context is actually a big batch buffer with several
- * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
- * values we are setting here are only for the first context restore:
- * on a subsequent save, the GPU will recreate this batchbuffer with new
- * values (including all the missing MI_LOAD_REGISTER_IMM commands that
- * we are not initializing here).
- *
- * Must keep consistent with virtual_update_register_offsets().
- */
- regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
- MI_LRI_FORCE_POSTED;
+static void init_common_reg_state(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool inhibit)
+{
+ u32 ctl;
+
+ ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+ if (inhibit)
+ ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+ if (INTEL_GEN(engine->i915) < 11)
+ ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
+ CTX_CTRL_RS_CTX_ENABLE);
+ regs[CTX_CONTEXT_CONTROL] = ctl;
+
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+}
- CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
- _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
- if (INTEL_GEN(engine->i915) < 11) {
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
- CTX_CTRL_RS_CTX_ENABLE);
- }
- CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
- CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
- CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
- RING_CTL_SIZE(ring->size) | RING_VALID);
- CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
- CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
- CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
- if (rcs) {
- struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0);
- CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET,
- RING_INDIRECT_CTX_OFFSET(base), 0);
- if (wa_ctx->indirect_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
- regs[CTX_RCS_INDIRECT_CTX + 1] =
- (ggtt_offset + wa_ctx->indirect_ctx.offset) |
- (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
-
- regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] =
- intel_lr_indirect_ctx_offset(engine) << 6;
- }
+static void init_wa_bb_reg_state(u32 * const regs,
+ const struct intel_engine_cs *engine,
+ u32 pos_bb_per_ctx)
+{
+ const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
- CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0);
- if (wa_ctx->per_ctx.size) {
- u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+ if (wa_ctx->per_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- regs[CTX_BB_PER_CTX_PTR + 1] =
- (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
- }
+ regs[pos_bb_per_ctx] =
+ (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
}
- regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
+ if (wa_ctx->indirect_ctx.size) {
+ const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
- CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
- /* PDP values well be assigned later if needed */
- CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
- CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
- CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
- CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
- CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
+ regs[pos_bb_per_ctx + 2] =
+ (ggtt_offset + wa_ctx->indirect_ctx.offset) |
+ (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
+ regs[pos_bb_per_ctx + 4] =
+ intel_lr_indirect_ctx_offset(engine) << 6;
+ }
+}
+
+static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
+{
if (i915_vm_is_4lvl(&ppgtt->vm)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
@@ -2860,17 +4457,43 @@ static void execlists_init_reg_state(u32 *regs,
ASSIGN_CTX_PDP(ppgtt, regs, 1);
ASSIGN_CTX_PDP(ppgtt, regs, 0);
}
+}
- if (rcs) {
- regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
- CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
+static struct i915_ppgtt *vm_alias(struct i915_address_space *vm)
+{
+ if (i915_is_ggtt(vm))
+ return i915_vm_to_ggtt(vm)->alias;
+ else
+ return i915_vm_to_ppgtt(vm);
+}
- i915_oa_init_reg_state(engine, ce, regs);
- }
+static void execlists_init_reg_state(u32 *regs,
+ const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ const struct intel_ring *ring,
+ bool inhibit)
+{
+ /*
+ * A context is actually a big batch buffer with several
+ * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The
+ * values we are setting here are only for the first context restore:
+ * on a subsequent save, the GPU will recreate this batchbuffer with new
+ * values (including all the missing MI_LOAD_REGISTER_IMM commands that
+ * we are not initializing here).
+ *
+ * Must keep consistent with virtual_update_register_offsets().
+ */
+ set_offsets(regs, reg_offsets(engine), engine, inhibit);
- regs[CTX_END] = MI_BATCH_BUFFER_END;
- if (INTEL_GEN(engine->i915) >= 10)
- regs[CTX_END] |= BIT(0);
+ init_common_reg_state(regs, engine, ring, inhibit);
+ init_ppgtt_reg_state(regs, vm_alias(ce->vm));
+
+ init_wa_bb_reg_state(regs, engine,
+ INTEL_GEN(engine->i915) >= 12 ?
+ GEN12_CTX_BB_PER_CTX_PTR :
+ CTX_BB_PER_CTX_PTR);
+
+ __reset_stop_ring(regs, engine);
}
static int
@@ -2879,8 +4502,8 @@ populate_lr_context(struct intel_context *ce,
struct intel_engine_cs *engine,
struct intel_ring *ring)
{
+ bool inhibit = true;
void *vaddr;
- u32 *regs;
int ret;
vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB);
@@ -2890,13 +4513,9 @@ populate_lr_context(struct intel_context *ce,
return ret;
}
+ set_redzone(vaddr, engine);
+
if (engine->default_state) {
- /*
- * We only want to copy over the template context state;
- * skipping over the headers reserved for GuC communication,
- * leaving those as zero.
- */
- const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
void *defaults;
defaults = i915_gem_object_pin_map(engine->default_state,
@@ -2906,81 +4525,62 @@ populate_lr_context(struct intel_context *ce,
goto err_unpin_ctx;
}
- memcpy(vaddr + start, defaults + start, engine->context_size);
+ memcpy(vaddr, defaults, engine->context_size);
i915_gem_object_unpin_map(engine->default_state);
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
+ inhibit = false;
}
/* The second page of the context object contains some fields which must
* be set up prior to the first execution. */
- regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
- execlists_init_reg_state(regs, ce, engine, ring);
- if (!engine->default_state)
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
- if (ce->gem_context == engine->i915->preempt_context &&
- INTEL_GEN(engine->i915) < 11)
- regs[CTX_CONTEXT_CONTROL + 1] |=
- _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
- CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT);
+ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
+ ce, engine, ring, inhibit);
ret = 0;
err_unpin_ctx:
- __i915_gem_object_flush_map(ctx_obj,
- LRC_HEADER_PAGES * PAGE_SIZE,
- engine->context_size);
+ __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size);
i915_gem_object_unpin_map(ctx_obj);
return ret;
}
-static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
-{
- if (ctx->timeline)
- return i915_timeline_get(ctx->timeline);
- else
- return i915_timeline_create(ctx->i915, NULL);
-}
-
-static int execlists_context_deferred_alloc(struct intel_context *ce,
- struct intel_engine_cs *engine)
+static int __execlists_context_alloc(struct intel_context *ce,
+ struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *ctx_obj;
+ struct intel_ring *ring;
struct i915_vma *vma;
u32 context_size;
- struct intel_ring *ring;
- struct i915_timeline *timeline;
int ret;
- if (ce->state)
- return 0;
-
+ GEM_BUG_ON(ce->state);
context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE);
- /*
- * Before the actual start of the context image, we insert a few pages
- * for our own use and for sharing with the GuC.
- */
- context_size += LRC_HEADER_PAGES * PAGE_SIZE;
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+ context_size += I915_GTT_PAGE_SIZE; /* for redzone */
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
if (IS_ERR(ctx_obj))
return PTR_ERR(ctx_obj);
- vma = i915_vma_instance(ctx_obj, &engine->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto error_deref_obj;
}
- timeline = get_timeline(ce->gem_context);
- if (IS_ERR(timeline)) {
- ret = PTR_ERR(timeline);
- goto error_deref_obj;
+ if (!ce->timeline) {
+ struct intel_timeline *tl;
+
+ tl = intel_timeline_create(engine->gt, NULL);
+ if (IS_ERR(tl)) {
+ ret = PTR_ERR(tl);
+ goto error_deref_obj;
+ }
+
+ ce->timeline = tl;
}
- ring = intel_engine_create_ring(engine,
- timeline,
- ce->gem_context->ring_size);
- i915_timeline_put(timeline);
+ ring = intel_engine_create_ring(engine, (unsigned long)ce->ring);
if (IS_ERR(ring)) {
ret = PTR_ERR(ring);
goto error_deref_obj;
@@ -3022,22 +4622,24 @@ static void virtual_context_destroy(struct kref *kref)
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct rb_node *node = &ve->nodes[sibling->id].rb;
+ unsigned long flags;
if (RB_EMPTY_NODE(node))
continue;
- spin_lock_irq(&sibling->active.lock);
+ spin_lock_irqsave(&sibling->active.lock, flags);
/* Detachment is lazily performed in the execlists tasklet */
if (!RB_EMPTY_NODE(node))
rb_erase_cached(node, &sibling->execlists.virtual);
- spin_unlock_irq(&sibling->active.lock);
+ spin_unlock_irqrestore(&sibling->active.lock, flags);
}
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
if (ve->context.state)
__execlists_context_fini(&ve->context);
+ intel_context_fini(&ve->context);
kfree(ve->bonds);
kfree(ve);
@@ -3065,8 +4667,16 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
return;
swap(ve->siblings[swp], ve->siblings[0]);
- virtual_update_register_offsets(ve->context.lrc_reg_state,
- ve->siblings[0]);
+ if (!intel_engine_has_relative_mmio(ve->siblings[0]))
+ virtual_update_register_offsets(ve->context.lrc_reg_state,
+ ve->siblings[0]);
+}
+
+static int virtual_context_alloc(struct intel_context *ce)
+{
+ struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
+
+ return __execlists_context_alloc(ce, ve->siblings[0]);
}
static int virtual_context_pin(struct intel_context *ce)
@@ -3090,6 +4700,8 @@ static void virtual_context_enter(struct intel_context *ce)
for (n = 0; n < ve->num_siblings; n++)
intel_engine_pm_get(ve->siblings[n]);
+
+ intel_timeline_enter(ce->timeline);
}
static void virtual_context_exit(struct intel_context *ce)
@@ -3097,11 +4709,15 @@ static void virtual_context_exit(struct intel_context *ce)
struct virtual_engine *ve = container_of(ce, typeof(*ve), context);
unsigned int n;
+ intel_timeline_exit(ce->timeline);
+
for (n = 0; n < ve->num_siblings; n++)
intel_engine_pm_put(ve->siblings[n]);
}
static const struct intel_context_ops virtual_context_ops = {
+ .alloc = virtual_context_alloc,
+
.pin = virtual_context_pin,
.unpin = execlists_context_unpin,
@@ -3128,10 +4744,9 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
mask = ve->siblings[0]->mask;
}
- GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
- ve->base.name,
- rq->fence.context, rq->fence.seqno,
- mask, ve->base.execlists.queue_priority_hint);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n",
+ rq->fence.context, rq->fence.seqno,
+ mask, ve->base.execlists.queue_priority_hint);
return mask;
}
@@ -3219,23 +4834,40 @@ submit_engine:
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ struct i915_request *old;
+ unsigned long flags;
- GEM_TRACE("%s: rq=%llx:%lld\n",
- ve->base.name,
- rq->fence.context,
- rq->fence.seqno);
+ ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n",
+ rq->fence.context,
+ rq->fence.seqno);
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
- GEM_BUG_ON(ve->request);
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ spin_lock_irqsave(&ve->base.active.lock, flags);
- ve->base.execlists.queue_priority_hint = rq_prio(rq);
- WRITE_ONCE(ve->request, rq);
+ old = ve->request;
+ if (old) { /* background completion event from preempt-to-busy */
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
- list_move_tail(&rq->sched.link, virtual_queue(ve));
+ if (i915_request_completed(rq)) {
+ __i915_request_submit(rq);
- tasklet_schedule(&ve->base.execlists.tasklet);
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ ve->request = NULL;
+ } else {
+ ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ ve->request = i915_request_get(rq);
+
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ list_move_tail(&rq->sched.link, virtual_queue(ve));
+
+ tasklet_schedule(&ve->base.execlists.tasklet);
+ }
+
+ spin_unlock_irqrestore(&ve->base.active.lock, flags);
}
static struct ve_bond *
@@ -3256,23 +4888,26 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ intel_engine_mask_t allowed, exec;
struct ve_bond *bond;
+ allowed = ~to_request(signal)->engine->mask;
+
bond = virtual_find_bond(ve, to_request(signal)->engine);
- if (bond) {
- intel_engine_mask_t old, new, cmp;
+ if (bond)
+ allowed &= bond->sibling_mask;
- cmp = READ_ONCE(rq->execution_mask);
- do {
- old = cmp;
- new = cmp & bond->sibling_mask;
- } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
- }
+ /* Restrict the bonded request to run on only the available engines */
+ exec = READ_ONCE(rq->execution_mask);
+ while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+ ;
+
+ /* Prevent the master from being re-run on the bonded engines */
+ to_request(signal)->execution_mask &= ~allowed;
}
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count)
{
struct virtual_engine *ve;
@@ -3283,18 +4918,21 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
return ERR_PTR(-EINVAL);
if (count == 1)
- return intel_context_create(ctx, siblings[0]);
+ return intel_context_create(siblings[0]);
ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL);
if (!ve)
return ERR_PTR(-ENOMEM);
- ve->base.i915 = ctx->i915;
+ ve->base.i915 = siblings[0]->i915;
+ ve->base.gt = siblings[0]->gt;
+ ve->base.uncore = siblings[0]->uncore;
ve->base.id = -1;
+
ve->base.class = OTHER_CLASS;
ve->base.uabi_class = I915_ENGINE_CLASS_INVALID;
ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
- ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+ ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL;
/*
* The decision on whether to submit a request using semaphores
@@ -3314,7 +4952,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
-
+ intel_engine_init_breadcrumbs(&ve->base);
intel_engine_init_execlists(&ve->base);
ve->base.cops = &virtual_context_ops;
@@ -3330,7 +4968,7 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
virtual_submission_tasklet,
(unsigned long)ve);
- intel_context_init(&ve->context, ctx, &ve->base);
+ intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
struct intel_engine_cs *sibling = siblings[n];
@@ -3391,8 +5029,12 @@ intel_execlists_create_virtual(struct i915_gem_context *ctx,
ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb;
ve->base.emit_fini_breadcrumb_dw =
sibling->emit_fini_breadcrumb_dw;
+
+ ve->base.flags = sibling->flags;
}
+ ve->base.flags |= I915_ENGINE_IS_VIRTUAL;
+
return &ve->context;
err_put:
@@ -3401,14 +5043,12 @@ err_put:
}
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src)
+intel_execlists_clone_virtual(struct intel_engine_cs *src)
{
struct virtual_engine *se = to_virtual_engine(src);
struct intel_context *dst;
- dst = intel_execlists_create_virtual(ctx,
- se->siblings,
+ dst = intel_execlists_create_virtual(se->siblings,
se->num_siblings);
if (IS_ERR(dst))
return dst;
@@ -3466,6 +5106,18 @@ int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
return 0;
}
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+ unsigned int sibling)
+{
+ struct virtual_engine *ve = to_virtual_engine(engine);
+
+ if (sibling >= ve->num_siblings)
+ return NULL;
+
+ return ve->siblings[sibling];
+}
+
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
@@ -3554,6 +5206,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
u32 head,
bool scrub)
{
+ GEM_BUG_ON(!intel_context_is_pinned(ce));
+
/*
* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
@@ -3562,16 +5216,8 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
- if (scrub) {
- u32 *regs = ce->lrc_reg_state;
-
- if (engine->pinned_default_state) {
- memcpy(regs, /* skip restoring the vanilla PPHWSP */
- engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
- engine->context_size - PAGE_SIZE);
- }
- execlists_init_reg_state(regs, ce, engine, ce->ring);
- }
+ if (scrub)
+ restore_default_state(ce, engine);
/* Rerun the request; its payload has been neutered (if guilty). */
ce->ring->head = head;
@@ -3580,6 +5226,13 @@ void intel_lr_context_reset(struct intel_engine_cs *engine,
__execlists_update_reg_state(ce, engine);
}
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine)
+{
+ return engine->set_default_submission ==
+ intel_execlists_set_default_submission;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_lrc.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index c2bba82bcc16..dfbc214e14f5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -43,6 +43,7 @@ struct intel_engine_cs;
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
+#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
@@ -66,6 +67,12 @@ struct intel_engine_cs;
#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
+#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
+#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
+#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
+/* in Gen12 ID 0x7FF is reserved to indicate idle */
+#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)
+
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
INTEL_CONTEXT_SCHEDULE_OUT,
@@ -76,33 +83,17 @@ enum {
void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
int intel_execlists_submission_setup(struct intel_engine_cs *engine);
-int intel_execlists_submission_init(struct intel_engine_cs *engine);
/* Logical Ring Contexts */
-
-/*
- * We allocate a header at the start of the context image for our own
- * use, therefore the actual location of the logical state is offset
- * from the start of the VMA. The layout is
- *
- * | [guc] | [hwsp] [logical state] |
- * |<- our header ->|<- context image ->|
- *
- */
-/* The first page is used for sharing data with the GuC */
-#define LRC_GUCSHR_PN (0)
-#define LRC_GUCSHR_SZ (1)
/* At the start of the context image is its per-process HWS page */
-#define LRC_PPHWSP_PN (LRC_GUCSHR_PN + LRC_GUCSHR_SZ)
+#define LRC_PPHWSP_PN (0)
#define LRC_PPHWSP_SZ (1)
-/* Finally we have the logical state for the context */
+/* After the PPHWSP we have the logical state for the context */
#define LRC_STATE_PN (LRC_PPHWSP_PN + LRC_PPHWSP_SZ)
-/*
- * Currently we include the PPHWSP in __intel_engine_context_size() so
- * the size of the header is synonymous with the start of the PPHWSP.
- */
-#define LRC_HEADER_PAGES LRC_PPHWSP_PN
+/* Space within PPHWSP reserved to be used as scratch */
+#define LRC_PPHWSP_SCRATCH 0x34
+#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
void intel_execlists_set_default_submission(struct intel_engine_cs *engine);
@@ -119,16 +110,21 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
unsigned int max);
struct intel_context *
-intel_execlists_create_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs **siblings,
+intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count);
struct intel_context *
-intel_execlists_clone_virtual(struct i915_gem_context *ctx,
- struct intel_engine_cs *src);
+intel_execlists_clone_virtual(struct intel_engine_cs *src);
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
const struct intel_engine_cs *sibling);
+struct intel_engine_cs *
+intel_virtual_engine_get_sibling(struct intel_engine_cs *engine,
+ unsigned int sibling);
+
+bool
+intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
+
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 6bf34738b4e5..08a3be65f700 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -9,60 +9,47 @@
#include <linux/types.h>
-/* GEN8+ Reg State Context */
-#define CTX_LRI_HEADER_0 0x01
-#define CTX_CONTEXT_CONTROL 0x02
-#define CTX_RING_HEAD 0x04
-#define CTX_RING_TAIL 0x06
-#define CTX_RING_BUFFER_START 0x08
-#define CTX_RING_BUFFER_CONTROL 0x0a
-#define CTX_BB_HEAD_U 0x0c
-#define CTX_BB_HEAD_L 0x0e
-#define CTX_BB_STATE 0x10
-#define CTX_SECOND_BB_HEAD_U 0x12
-#define CTX_SECOND_BB_HEAD_L 0x14
-#define CTX_SECOND_BB_STATE 0x16
-#define CTX_BB_PER_CTX_PTR 0x18
-#define CTX_RCS_INDIRECT_CTX 0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
-#define CTX_LRI_HEADER_1 0x21
-#define CTX_CTX_TIMESTAMP 0x22
-#define CTX_PDP3_UDW 0x24
-#define CTX_PDP3_LDW 0x26
-#define CTX_PDP2_UDW 0x28
-#define CTX_PDP2_LDW 0x2a
-#define CTX_PDP1_UDW 0x2c
-#define CTX_PDP1_LDW 0x2e
-#define CTX_PDP0_UDW 0x30
-#define CTX_PDP0_LDW 0x32
-#define CTX_LRI_HEADER_2 0x41
-#define CTX_R_PWR_CLK_STATE 0x42
-#define CTX_END 0x44
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
- u32 *reg_state__ = (reg_state); \
- const u32 pos__ = (pos); \
- (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
- (reg_state__)[(pos__) + 1] = (val); \
-} while (0)
+/* GEN8 to GEN11 Reg State Context */
+#define CTX_CONTEXT_CONTROL (0x02 + 1)
+#define CTX_RING_HEAD (0x04 + 1)
+#define CTX_RING_TAIL (0x06 + 1)
+#define CTX_RING_START (0x08 + 1)
+#define CTX_RING_CTL (0x0a + 1)
+#define CTX_BB_STATE (0x10 + 1)
+#define CTX_BB_PER_CTX_PTR (0x18 + 1)
+#define CTX_PDP3_UDW (0x24 + 1)
+#define CTX_PDP3_LDW (0x26 + 1)
+#define CTX_PDP2_UDW (0x28 + 1)
+#define CTX_PDP2_LDW (0x2a + 1)
+#define CTX_PDP1_UDW (0x2c + 1)
+#define CTX_PDP1_LDW (0x2e + 1)
+#define CTX_PDP0_UDW (0x30 + 1)
+#define CTX_PDP0_LDW (0x32 + 1)
+#define CTX_R_PWR_CLK_STATE (0x42 + 1)
+
+#define GEN9_CTX_RING_MI_MODE 0x54
+
+/* GEN12+ Reg State Context */
+#define GEN12_CTX_BB_PER_CTX_PTR (0x12 + 1)
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
- (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \
} while (0)
#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
u32 *reg_state__ = (reg_state); \
const u64 addr__ = px_dma(ppgtt->pd); \
- (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
- (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \
+ (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \
} while (0)
#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
#define GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x19
#define GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x1A
+#define GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0xD
#endif /* _INTEL_LRC_REG_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 1f9db50b1869..eeef90b55c64 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,8 +23,10 @@
#include "i915_drv.h"
#include "intel_engine.h"
+#include "intel_gt.h"
#include "intel_mocs.h"
#include "intel_lrc.h"
+#include "intel_ring.h"
/* structures required */
struct drm_i915_mocs_entry {
@@ -61,6 +63,10 @@ struct drm_i915_mocs_table {
#define GEN11_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */
/* (e)LLC caching options */
+/*
+ * Note: LE_0_PAGETABLE works only up to Gen11; for newer gens it means
+ * the same as LE_UC
+ */
#define LE_0_PAGETABLE _LE_CACHEABILITY(0)
#define LE_1_UC _LE_CACHEABILITY(1)
#define LE_2_WT _LE_CACHEABILITY(2)
@@ -99,8 +105,9 @@ struct drm_i915_mocs_table {
* of bspec.
*
* Entries not part of the following tables are undefined as far as
- * userspace is concerned and shouldn't be relied upon. For the time
- * being they will be initialized to PTE.
+ * userspace is concerned and shouldn't be relied upon. For Gen < 12
+ * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
+ * PTE and will be initialized to an invalid value.
*
* The last two entries are reserved by the hardware. For ICL+ they
* should be initialized according to bspec and never used, for older
@@ -120,7 +127,7 @@ struct drm_i915_mocs_table {
LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
L3_3_WB)
-static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
+static const struct drm_i915_mocs_entry skl_mocs_table[] = {
GEN9_MOCS_ENTRIES,
MOCS_ENTRY(I915_MOCS_CACHED,
LE_3_WB | LE_TC_2_LLC_ELLC | LE_LRUM(3),
@@ -136,14 +143,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
};
#define GEN11_MOCS_ENTRIES \
- /* Base - Uncached (Deprecated) */ \
- MOCS_ENTRY(I915_MOCS_UNCACHED, \
- LE_1_UC | LE_TC_1_LLC, \
- L3_1_UC), \
- /* Base - L3 + LeCC:PAT (Deprecated) */ \
- MOCS_ENTRY(I915_MOCS_PTE, \
- LE_0_PAGETABLE | LE_TC_1_LLC, \
- L3_3_WB), \
+ /* Entries 0 and 1 are defined per-platform */ \
/* Base - L3 + LLC */ \
MOCS_ENTRY(2, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
@@ -200,14 +200,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
- /* Bypass LLC - Uncached (EHL+) */ \
- MOCS_ENTRY(16, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_1_UC), \
- /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
- MOCS_ENTRY(17, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -241,79 +233,92 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), \
L3_1_UC)
-static const struct drm_i915_mocs_entry icelake_mocs_table[] = {
+static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
+ /* Base - Error (Reserved for Non-Use) */
+ MOCS_ENTRY(0, 0x0, 0x0),
+ /* Base - Reserved */
+ MOCS_ENTRY(1, 0x0, 0x0),
+
+ GEN11_MOCS_ENTRIES,
+
+ /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+ MOCS_ENTRY(48,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + L3 */
+ MOCS_ENTRY(49,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+ /* Implicitly enable L1 - HDC:L1 + LLC */
+ MOCS_ENTRY(50,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* Implicitly enable L1 - HDC:L1 */
+ MOCS_ENTRY(51,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_1_UC),
+ /* HW Special Case (CCS) */
+ MOCS_ENTRY(60,
+ LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+ L3_1_UC),
+ /* HW Special Case (Displayable) */
+ MOCS_ENTRY(61,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_3_WB),
+};
+
+static const struct drm_i915_mocs_entry icl_mocs_table[] = {
+ /* Base - Uncached (Deprecated) */
+ MOCS_ENTRY(I915_MOCS_UNCACHED,
+ LE_1_UC | LE_TC_1_LLC,
+ L3_1_UC),
+ /* Base - L3 + LeCC:PAT (Deprecated) */
+ MOCS_ENTRY(I915_MOCS_PTE,
+ LE_0_PAGETABLE | LE_TC_1_LLC,
+ L3_3_WB),
+
GEN11_MOCS_ENTRIES
};
-/**
- * get_mocs_settings()
- * @dev_priv: i915 device.
- * @table: Output table that will be made to point at appropriate
- * MOCS values for the device.
- *
- * This function will return the values of the MOCS table that needs to
- * be programmed for the platform. It will return the values that need
- * to be programmed and if they need to be programmed.
- *
- * Return: true if there are applicable MOCS settings for the device.
- */
-static bool get_mocs_settings(struct drm_i915_private *dev_priv,
+static bool get_mocs_settings(const struct drm_i915_private *i915,
struct drm_i915_mocs_table *table)
{
- bool result = false;
-
- if (INTEL_GEN(dev_priv) >= 11) {
- table->size = ARRAY_SIZE(icelake_mocs_table);
- table->table = icelake_mocs_table;
+ if (INTEL_GEN(i915) >= 12) {
+ table->size = ARRAY_SIZE(tgl_mocs_table);
+ table->table = tgl_mocs_table;
+ table->n_entries = GEN11_NUM_MOCS_ENTRIES;
+ } else if (IS_GEN(i915, 11)) {
+ table->size = ARRAY_SIZE(icl_mocs_table);
+ table->table = icl_mocs_table;
table->n_entries = GEN11_NUM_MOCS_ENTRIES;
- result = true;
- } else if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) {
- table->size = ARRAY_SIZE(skylake_mocs_table);
+ } else if (IS_GEN9_BC(i915) || IS_CANNONLAKE(i915)) {
+ table->size = ARRAY_SIZE(skl_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
- table->table = skylake_mocs_table;
- result = true;
- } else if (IS_GEN9_LP(dev_priv)) {
+ table->table = skl_mocs_table;
+ } else if (IS_GEN9_LP(i915)) {
table->size = ARRAY_SIZE(broxton_mocs_table);
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->table = broxton_mocs_table;
- result = true;
} else {
- WARN_ONCE(INTEL_GEN(dev_priv) >= 9,
+ WARN_ONCE(INTEL_GEN(i915) >= 9,
"Platform that should have a MOCS table does not.\n");
+ return false;
}
+ if (GEM_DEBUG_WARN_ON(table->size > table->n_entries))
+ return false;
+
/* WaDisableSkipCaching:skl,bxt,kbl,glk */
- if (IS_GEN(dev_priv, 9)) {
+ if (IS_GEN(i915, 9)) {
int i;
for (i = 0; i < table->size; i++)
- if (WARN_ON(table->table[i].l3cc_value &
- (L3_ESC(1) | L3_SCC(0x7))))
+ if (GEM_DEBUG_WARN_ON(table->table[i].l3cc_value &
+ (L3_ESC(1) | L3_SCC(0x7))))
return false;
}
- return result;
-}
-
-static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
-{
- switch (engine_id) {
- case RCS0:
- return GEN9_GFX_MOCS(index);
- case VCS0:
- return GEN9_MFX0_MOCS(index);
- case BCS0:
- return GEN9_BLT_MOCS(index);
- case VECS0:
- return GEN9_VEBOX_MOCS(index);
- case VCS1:
- return GEN9_MFX1_MOCS(index);
- case VCS2:
- return GEN11_MFX2_MOCS(index);
- default:
- MISSING_CASE(engine_id);
- return INVALID_MMIO_REG;
- }
+ return true;
}
/*
@@ -323,90 +328,47 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index)
static u32 get_entry_control(const struct drm_i915_mocs_table *table,
unsigned int index)
{
- if (table->table[index].used)
+ if (index < table->size && table->table[index].used)
return table->table[index].control_value;
return table->table[I915_MOCS_PTE].control_value;
}
-/**
- * intel_mocs_init_engine() - emit the mocs control table
- * @engine: The engine for whom to emit the registers.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- */
-void intel_mocs_init_engine(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *dev_priv = engine->i915;
- struct drm_i915_mocs_table table;
- unsigned int index;
- u32 unused_value;
+#define for_each_mocs(mocs, t, i) \
+ for (i = 0; \
+ i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
+ i++)
- if (!get_mocs_settings(dev_priv, &table))
- return;
-
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].control_value;
-
- for (index = 0; index < table.size; index++) {
- u32 value = get_entry_control(&table, index);
-
- I915_WRITE(mocs_register(engine->id, index), value);
- }
+static void __init_mocs_table(struct intel_uncore *uncore,
+ const struct drm_i915_mocs_table *table,
+ u32 addr)
+{
+ unsigned int i;
+ u32 mocs;
- /* All remaining entries are also unused */
- for (; index < table.n_entries; index++)
- I915_WRITE(mocs_register(engine->id, index), unused_value);
+ for_each_mocs(mocs, table, i)
+ intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
}
-/**
- * emit_mocs_control_table() - emit the mocs control table
- * @rq: Request to set up the MOCS table for.
- * @table: The values to program into the control regs.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address.
- *
- * Return: 0 on success, otherwise the error status.
- */
-static int emit_mocs_control_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
+static u32 mocs_offset(const struct intel_engine_cs *engine)
{
- enum intel_engine_id engine = rq->engine->id;
- unsigned int index;
- u32 unused_value;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
-
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].control_value;
-
- cs = intel_ring_begin(rq, 2 + 2 * table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries);
-
- for (index = 0; index < table->size; index++) {
- u32 value = get_entry_control(table, index);
-
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = value;
- }
-
- /* All remaining entries are also unused */
- for (; index < table->n_entries; index++) {
- *cs++ = i915_mmio_reg_offset(mocs_register(engine, index));
- *cs++ = unused_value;
- }
-
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
+ static const u32 offset[] = {
+ [RCS0] = __GEN9_RCS0_MOCS0,
+ [VCS0] = __GEN9_VCS0_MOCS0,
+ [VCS1] = __GEN9_VCS1_MOCS0,
+ [VECS0] = __GEN9_VECS0_MOCS0,
+ [BCS0] = __GEN9_BCS0_MOCS0,
+ [VCS2] = __GEN11_VCS2_MOCS0,
+ };
+
+ GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
+ return offset[engine->id];
+}
- return 0;
+static void init_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
+{
+ __init_mocs_table(engine->uncore, table, mocs_offset(engine));
}
/*
@@ -416,159 +378,81 @@ static int emit_mocs_control_table(struct i915_request *rq,
static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
unsigned int index)
{
- if (table->table[index].used)
+ if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
return table->table[I915_MOCS_PTE].l3cc_value;
}
-static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table,
- u16 low,
- u16 high)
+static inline u32 l3cc_combine(u16 low, u16 high)
{
- return low | high << 16;
+ return low | (u32)high << 16;
}
-/**
- * emit_mocs_l3cc_table() - emit the mocs control table
- * @rq: Request to set up the MOCS table for.
- * @table: The values to program into the control regs.
- *
- * This function simply emits a MI_LOAD_REGISTER_IMM command for the
- * given table starting at the given address. This register set is
- * programmed in pairs.
- *
- * Return: 0 on success, otherwise the error status.
- */
-static int emit_mocs_l3cc_table(struct i915_request *rq,
- const struct drm_i915_mocs_table *table)
+#define for_each_l3cc(l3cc, t, i) \
+ for (i = 0; \
+ i < ((t)->n_entries + 1) / 2 ? \
+ (l3cc = l3cc_combine(get_entry_l3cc((t), 2 * i), \
+ get_entry_l3cc((t), 2 * i + 1))), 1 : \
+ 0; \
+ i++)
+
+static void init_l3cc_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table)
{
- u16 unused_value;
+ struct intel_uncore *uncore = engine->uncore;
unsigned int i;
- u32 *cs;
-
- if (GEM_WARN_ON(table->size > table->n_entries))
- return -ENODEV;
+ u32 l3cc;
- /* Set unused values to PTE */
- unused_value = table->table[I915_MOCS_PTE].l3cc_value;
-
- cs = intel_ring_begin(rq, 2 + table->n_entries);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- *cs++ = MI_LOAD_REGISTER_IMM(table->n_entries / 2);
-
- for (i = 0; i < table->size / 2; i++) {
- u16 low = get_entry_l3cc(table, 2 * i);
- u16 high = get_entry_l3cc(table, 2 * i + 1);
+ for_each_l3cc(l3cc, table, i)
+ intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
+}
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, high);
- }
+void intel_mocs_init_engine(struct intel_engine_cs *engine)
+{
+ struct drm_i915_mocs_table table;
- /* Odd table size - 1 left over */
- if (table->size & 0x01) {
- u16 low = get_entry_l3cc(table, 2 * i);
+ /* Called under a blanket forcewake */
+ assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, low, unused_value);
- i++;
- }
+ if (!get_mocs_settings(engine->i915, &table))
+ return;
- /* All remaining entries are also unused */
- for (; i < table->n_entries / 2; i++) {
- *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i));
- *cs++ = l3cc_combine(table, unused_value, unused_value);
- }
+ /* Platforms with global MOCS do not need per-engine initialization. */
+ if (!HAS_GLOBAL_MOCS_REGISTERS(engine->i915))
+ init_mocs_table(engine, &table);
- *cs++ = MI_NOOP;
- intel_ring_advance(rq, cs);
+ if (engine->class == RENDER_CLASS)
+ init_l3cc_table(engine, &table);
+}
- return 0;
+static u32 global_mocs_offset(void)
+{
+ return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
}
-/**
- * intel_mocs_init_l3cc_table() - program the mocs control table
- * @dev_priv: i915 device private
- *
- * This function simply programs the mocs registers for the given table
- * starting at the given address. This register set is programmed in pairs.
- *
- * These registers may get programmed more than once, it is simpler to
- * re-program 32 registers than maintain the state of when they were programmed.
- * We are always reprogramming with the same values and this only on context
- * start.
- *
- * Return: Nothing.
- */
-void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv)
+static void init_global_mocs(struct intel_gt *gt)
{
struct drm_i915_mocs_table table;
- unsigned int i;
- u16 unused_value;
- if (!get_mocs_settings(dev_priv, &table))
+ /*
+ * LLC and eDRAM control values are not applicable to dgfx
+ */
+ if (IS_DGFX(gt->i915))
return;
- /* Set unused values to PTE */
- unused_value = table.table[I915_MOCS_PTE].l3cc_value;
-
- for (i = 0; i < table.size / 2; i++) {
- u16 low = get_entry_l3cc(&table, 2 * i);
- u16 high = get_entry_l3cc(&table, 2 * i + 1);
-
- I915_WRITE(GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, high));
- }
-
- /* Odd table size - 1 left over */
- if (table.size & 0x01) {
- u16 low = get_entry_l3cc(&table, 2 * i);
-
- I915_WRITE(GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, low, unused_value));
- i++;
- }
+ if (!get_mocs_settings(gt->i915, &table))
+ return;
- /* All remaining entries are also unused */
- for (; i < table.n_entries / 2; i++)
- I915_WRITE(GEN9_LNCFCMOCS(i),
- l3cc_combine(&table, unused_value, unused_value));
+ __init_mocs_table(gt->uncore, &table, global_mocs_offset());
}
-/**
- * intel_rcs_context_init_mocs() - program the MOCS register.
- * @rq: Request to set up the MOCS tables for.
- *
- * This function will emit a batch buffer with the values required for
- * programming the MOCS register values for all the currently supported
- * rings.
- *
- * These registers are partially stored in the RCS context, so they are
- * emitted at the same time so that when a context is created these registers
- * are set up. These registers have to be emitted into the start of the
- * context as setting the ELSP will re-init some of these registers back
- * to the hw values.
- *
- * Return: 0 on success, otherwise the error status.
- */
-int intel_rcs_context_init_mocs(struct i915_request *rq)
+void intel_mocs_init(struct intel_gt *gt)
{
- struct drm_i915_mocs_table t;
- int ret;
-
- if (get_mocs_settings(rq->i915, &t)) {
- /* Program the RCS control registers */
- ret = emit_mocs_control_table(rq, &t);
- if (ret)
- return ret;
-
- /* Now program the l3cc registers */
- ret = emit_mocs_l3cc_table(rq, &t);
- if (ret)
- return ret;
- }
-
- return 0;
+ if (HAS_GLOBAL_MOCS_REGISTERS(gt->i915))
+ init_global_mocs(gt);
}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_mocs.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 0913704a1af2..83371f3e6ba1 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -49,12 +49,10 @@
* context handling keep the MOCS in step.
*/
-struct drm_i915_private;
-struct i915_request;
struct intel_engine_cs;
+struct intel_gt;
-int intel_rcs_context_init_mocs(struct i915_request *rq);
-void intel_mocs_init_l3cc_table(struct drm_i915_private *dev_priv);
+void intel_mocs_init(struct intel_gt *gt);
void intel_mocs_init_engine(struct intel_engine_cs *engine);
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
new file mode 100644
index 000000000000..f86f7e68ce5e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include "i915_trace.h"
+#include "intel_gtt.h"
+#include "gen6_ppgtt.h"
+#include "gen8_ppgtt.h"
+
+struct i915_page_table *alloc_pt(struct i915_address_space *vm)
+{
+ struct i915_page_table *pt;
+
+ pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pt))
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(setup_page_dma(vm, &pt->base))) {
+ kfree(pt);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ atomic_set(&pt->used, 0);
+ return pt;
+}
+
+struct i915_page_directory *__alloc_pd(size_t sz)
+{
+ struct i915_page_directory *pd;
+
+ pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
+ if (unlikely(!pd))
+ return NULL;
+
+ spin_lock_init(&pd->lock);
+ return pd;
+}
+
+struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
+{
+ struct i915_page_directory *pd;
+
+ pd = __alloc_pd(sizeof(*pd));
+ if (unlikely(!pd))
+ return ERR_PTR(-ENOMEM);
+
+ if (unlikely(setup_page_dma(vm, px_base(pd)))) {
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return pd;
+}
+
+void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
+{
+ cleanup_page_dma(vm, pd);
+ kfree(pd);
+}
+
+static inline void
+write_dma_entry(struct i915_page_dma * const pdma,
+ const unsigned short idx,
+ const u64 encoded_entry)
+{
+ u64 * const vaddr = kmap_atomic(pdma->page);
+
+ vaddr[idx] = encoded_entry;
+ kunmap_atomic(vaddr);
+}
+
+void
+__set_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_dma * const to,
+ u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
+{
+ /* Each thread pre-pins the pd, and we may have a thread per pde. */
+ GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * ARRAY_SIZE(pd->entry));
+
+ atomic_inc(px_used(pd));
+ pd->entry[idx] = to;
+ write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
+}
+
+void
+clear_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ const struct i915_page_scratch * const scratch)
+{
+ GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
+
+ write_dma_entry(px_base(pd), idx, scratch->encode);
+ pd->entry[idx] = NULL;
+ atomic_dec(px_used(pd));
+}
+
+bool
+release_pd_entry(struct i915_page_directory * const pd,
+ const unsigned short idx,
+ struct i915_page_table * const pt,
+ const struct i915_page_scratch * const scratch)
+{
+ bool free = false;
+
+ if (atomic_add_unless(&pt->used, -1, 1))
+ return false;
+
+ spin_lock(&pd->lock);
+ if (atomic_dec_and_test(&pt->used)) {
+ clear_pd_entry(pd, idx, scratch);
+ free = true;
+ }
+ spin_unlock(&pd->lock);
+
+ return free;
+}
+
+int i915_ppgtt_init_hw(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ gtt_write_workarounds(gt);
+
+ if (IS_GEN(i915, 6))
+ gen6_ppgtt_enable(gt);
+ else if (IS_GEN(i915, 7))
+ gen7_ppgtt_enable(gt);
+
+ return 0;
+}
+
+static struct i915_ppgtt *
+__ppgtt_create(struct intel_gt *gt)
+{
+ if (INTEL_GEN(gt->i915) < 8)
+ return gen6_ppgtt_create(gt);
+ else
+ return gen8_ppgtt_create(gt);
+}
+
+struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt)
+{
+ struct i915_ppgtt *ppgtt;
+
+ ppgtt = __ppgtt_create(gt);
+ if (IS_ERR(ppgtt))
+ return ppgtt;
+
+ trace_i915_ppgtt_create(&ppgtt->vm);
+
+ return ppgtt;
+}
+
+static int ppgtt_bind_vma(struct i915_vma *vma,
+ enum i915_cache_level cache_level,
+ u32 flags)
+{
+ u32 pte_flags;
+ int err;
+
+ if (flags & I915_VMA_ALLOC) {
+ err = vma->vm->allocate_va_range(vma->vm,
+ vma->node.start, vma->size);
+ if (err)
+ return err;
+
+ set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
+ }
+
+ /* Applicable to VLV, and gen8+ */
+ pte_flags = 0;
+ if (i915_gem_object_is_readonly(vma->obj))
+ pte_flags |= PTE_READ_ONLY;
+
+ GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
+ vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
+ wmb();
+
+ return 0;
+}
+
+static void ppgtt_unbind_vma(struct i915_vma *vma)
+{
+ if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
+ vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
+}
+
+int ppgtt_set_pages(struct i915_vma *vma)
+{
+ GEM_BUG_ON(vma->pages);
+
+ vma->pages = vma->obj->mm.pages;
+
+ vma->page_sizes = vma->obj->mm.page_sizes;
+
+ return 0;
+}
+
+void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+
+ ppgtt->vm.gt = gt;
+ ppgtt->vm.i915 = i915;
+ ppgtt->vm.dma = &i915->drm.pdev->dev;
+ ppgtt->vm.total = BIT_ULL(INTEL_INFO(i915)->ppgtt_size);
+
+ i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
+
+ ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
+ ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
+ ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages;
+ ppgtt->vm.vma_ops.clear_pages = clear_pages;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
new file mode 100644
index 000000000000..9e303c29d6e3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -0,0 +1,776 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/pm_runtime.h>
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_pm.h"
+#include "intel_rc6.h"
+#include "intel_sideband.h"
+
+/**
+ * DOC: RC6
+ *
+ * RC6 is a special power stage which allows the GPU to enter an very
+ * low-voltage mode when idle, using down to 0V while at this stage. This
+ * stage is entered automatically when the GPU is idle when RC6 support is
+ * enabled, and as soon as new workload arises GPU wakes up automatically as
+ * well.
+ *
+ * There are different RC6 modes available in Intel GPU, which differentiate
+ * among each other with the latency required to enter and leave RC6 and
+ * voltage consumed by the GPU in different states.
+ *
+ * The combination of the following flags define which states GPU is allowed
+ * to enter, while RC6 is the normal RC6 state, RC6p is the deep RC6, and
+ * RC6pp is deepest RC6. Their support by hardware varies according to the
+ * GPU, BIOS, chipset and platform. RC6 is usually the safest one and the one
+ * which brings the most power savings; deeper states save more power, but
+ * require higher latency to switch to and wake up.
+ */
+
+static struct intel_gt *rc6_to_gt(struct intel_rc6 *rc6)
+{
+ return container_of(rc6, struct intel_gt, rc6);
+}
+
+static struct intel_uncore *rc6_to_uncore(struct intel_rc6 *rc)
+{
+ return rc6_to_gt(rc)->uncore;
+}
+
+static struct drm_i915_private *rc6_to_i915(struct intel_rc6 *rc)
+{
+ return rc6_to_gt(rc)->i915;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+ intel_uncore_write_fw(uncore, reg, val);
+}
+
+static void gen11_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2b: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ set(uncore, GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */
+
+ /*
+ * 2c: Program Coarse Power Gating Policies.
+ *
+ * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+ * use instead is a more conservative estimate for the maximum time
+ * it takes us to service a CS interrupt and submit a new ELSP - that
+ * is the time which the GPU is idle waiting for the CPU to select the
+ * next request to execute. If the idle hysteresis is less than that
+ * interrupt service latency, the hardware will automatically gate
+ * the power well and we will then incur the wake up cost on top of
+ * the service latency. A similar guide from plane_state is that we
+ * do not want the enable hysteresis to less than the wakeup latency.
+ *
+ * igt/gem_exec_nop/sequential provides a rough estimate for the
+ * service latency, and puts it under 10us for Icelake, similar to
+ * Broadwell+, To be conservative, we want to factor in a context
+ * switch on top (due to ksoftirqd).
+ */
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 60);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 60);
+
+ /* 3a: Enable RC6 */
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ GEN6_RC_CTL_EI_MODE(1);
+
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE |
+ GEN9_MEDIA_PG_ENABLE |
+ GEN11_MEDIA_SAMPLER_PG_ENABLE);
+}
+
+static void gen9_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 rc6_mode;
+
+ /* 2b: Program RC6 thresholds.*/
+ if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) {
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16 | 85);
+ set(uncore, GEN10_MEDIA_WAKE_RATE_LIMIT, 150);
+ } else if (IS_SKYLAKE(rc6_to_i915(rc6))) {
+ /*
+ * WaRsDoubleRc6WrlWithCoarsePowerGating:skl Doubling WRL only
+ * when CPG is enabled
+ */
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 108 << 16);
+ } else {
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 54 << 16);
+ }
+
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GUC_MAX_IDLE_COUNT, 0xA);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ /*
+ * 2c: Program Coarse Power Gating Policies.
+ *
+ * Bspec's guidance is to use 25us (really 25 * 1280ns) here. What we
+ * use instead is a more conservative estimate for the maximum time
+ * it takes us to service a CS interrupt and submit a new ELSP - that
+ * is the time which the GPU is idle waiting for the CPU to select the
+ * next request to execute. If the idle hysteresis is less than that
+ * interrupt service latency, the hardware will automatically gate
+ * the power well and we will then incur the wake up cost on top of
+ * the service latency. A similar guide from plane_state is that we
+ * do not want the enable hysteresis to less than the wakeup latency.
+ *
+ * igt/gem_exec_nop/sequential provides a rough estimate for the
+ * service latency, and puts it around 10us for Broadwell (and other
+ * big core) and around 40us for Broxton (and other low power cores).
+ * [Note that for legacy ringbuffer submission, this is less than 1us!]
+ * However, the wakeup latency on Broxton is closer to 100us. To be
+ * conservative, we have to factor in a context switch on top (due
+ * to ksoftirqd).
+ */
+ set(uncore, GEN9_MEDIA_PG_IDLE_HYSTERESIS, 250);
+ set(uncore, GEN9_RENDER_PG_IDLE_HYSTERESIS, 250);
+
+ /* 3a: Enable RC6 */
+ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */
+
+ /* WaRsUseTimeoutMode:cnl (pre-prod) */
+ if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0))
+ rc6_mode = GEN7_RC_CTL_TO_MODE;
+ else
+ rc6_mode = GEN6_RC_CTL_EI_MODE(1);
+
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN6_RC_CTL_RC6_ENABLE |
+ rc6_mode;
+
+ /*
+ * WaRsDisableCoarsePowerGating:skl,cnl
+ * - Render/Media PG need to be disabled with RC6.
+ */
+ if (!NEEDS_WaRsDisableCoarsePowerGating(rc6_to_i915(rc6)))
+ set(uncore, GEN9_PG_ENABLE,
+ GEN9_RENDER_PG_ENABLE | GEN9_MEDIA_PG_ENABLE);
+}
+
+static void gen8_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2b: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ set(uncore, GEN6_RC_SLEEP, 0);
+ set(uncore, GEN6_RC6_THRESHOLD, 625); /* 800us/1.28 for TO */
+
+ /* 3: Enable RC6 */
+ rc6->ctl_enable =
+ GEN6_RC_CTL_HW_ENABLE |
+ GEN7_RC_CTL_TO_MODE |
+ GEN6_RC_CTL_RC6_ENABLE;
+}
+
+static void gen6_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 rc6vids, rc6_mask;
+ int ret;
+
+ set(uncore, GEN6_RC1_WAKE_RATE_LIMIT, 1000 << 16);
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16 | 30);
+ set(uncore, GEN6_RC6pp_WAKE_RATE_LIMIT, 30);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GEN6_RC_SLEEP, 0);
+ set(uncore, GEN6_RC1e_THRESHOLD, 1000);
+ if (IS_IVYBRIDGE(i915))
+ set(uncore, GEN6_RC6_THRESHOLD, 125000);
+ else
+ set(uncore, GEN6_RC6_THRESHOLD, 50000);
+ set(uncore, GEN6_RC6p_THRESHOLD, 150000);
+ set(uncore, GEN6_RC6pp_THRESHOLD, 64000); /* unused */
+
+ /* We don't use those on Haswell */
+ rc6_mask = GEN6_RC_CTL_RC6_ENABLE;
+ if (HAS_RC6p(i915))
+ rc6_mask |= GEN6_RC_CTL_RC6p_ENABLE;
+ if (HAS_RC6pp(i915))
+ rc6_mask |= GEN6_RC_CTL_RC6pp_ENABLE;
+ rc6->ctl_enable =
+ rc6_mask |
+ GEN6_RC_CTL_EI_MODE(1) |
+ GEN6_RC_CTL_HW_ENABLE;
+
+ rc6vids = 0;
+ ret = sandybridge_pcode_read(i915, GEN6_PCODE_READ_RC6VIDS,
+ &rc6vids, NULL);
+ if (IS_GEN(i915, 6) && ret) {
+ DRM_DEBUG_DRIVER("Couldn't check for BIOS workaround\n");
+ } else if (IS_GEN(i915, 6) &&
+ (GEN6_DECODE_RC6_VID(rc6vids & 0xff) < 450)) {
+ DRM_DEBUG_DRIVER("You should update your BIOS. Correcting minimum rc6 voltage (%dmV->%dmV)\n",
+ GEN6_DECODE_RC6_VID(rc6vids & 0xff), 450);
+ rc6vids &= 0xffff00;
+ rc6vids |= GEN6_ENCODE_RC6_VID(450);
+ ret = sandybridge_pcode_write(i915, GEN6_PCODE_WRITE_RC6VIDS, rc6vids);
+ if (ret)
+ DRM_ERROR("Couldn't fix incorrect rc6 voltage\n");
+ }
+}
+
+/* Check that the pcbr address is not empty. */
+static int chv_rc6_init(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ resource_size_t pctx_paddr, paddr;
+ resource_size_t pctx_size = 32 * SZ_1K;
+ u32 pcbr;
+
+ pcbr = intel_uncore_read(uncore, VLV_PCBR);
+ if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
+ DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+ paddr = rc6_to_i915(rc6)->dsm.end + 1 - pctx_size;
+ GEM_BUG_ON(paddr > U32_MAX);
+
+ pctx_paddr = (paddr & ~4095);
+ intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+ }
+
+ return 0;
+}
+
+static int vlv_rc6_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_gem_object *pctx;
+ resource_size_t pctx_paddr;
+ resource_size_t pctx_size = 24 * SZ_1K;
+ u32 pcbr;
+
+ pcbr = intel_uncore_read(uncore, VLV_PCBR);
+ if (pcbr) {
+ /* BIOS set it up already, grab the pre-alloc'd space */
+ resource_size_t pcbr_offset;
+
+ pcbr_offset = (pcbr & ~4095) - i915->dsm.start;
+ pctx = i915_gem_object_create_stolen_for_preallocated(i915,
+ pcbr_offset,
+ I915_GTT_OFFSET_NONE,
+ pctx_size);
+ if (IS_ERR(pctx))
+ return PTR_ERR(pctx);
+
+ goto out;
+ }
+
+ DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
+
+ /*
+ * From the Gunit register HAS:
+ * The Gfx driver is expected to program this register and ensure
+ * proper allocation within Gfx stolen memory. For example, this
+ * register should be programmed such than the PCBR range does not
+ * overlap with other ranges, such as the frame buffer, protected
+ * memory, or any other relevant ranges.
+ */
+ pctx = i915_gem_object_create_stolen(i915, pctx_size);
+ if (IS_ERR(pctx)) {
+ DRM_DEBUG("not enough stolen space for PCTX, disabling\n");
+ return PTR_ERR(pctx);
+ }
+
+ GEM_BUG_ON(range_overflows_t(u64,
+ i915->dsm.start,
+ pctx->stolen->start,
+ U32_MAX));
+ pctx_paddr = i915->dsm.start + pctx->stolen->start;
+ intel_uncore_write(uncore, VLV_PCBR, pctx_paddr);
+
+out:
+ rc6->pctx = pctx;
+ return 0;
+}
+
+static void chv_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /* 2a: Program RC6 thresholds.*/
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */
+
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+ set(uncore, GEN6_RC_SLEEP, 0);
+
+ /* TO threshold set to 500 us (0x186 * 1.28 us) */
+ set(uncore, GEN6_RC6_THRESHOLD, 0x186);
+
+ /* Allows RC6 residency counter to work */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+ /* 3: Enable RC6 */
+ rc6->ctl_enable = GEN7_RC_CTL_TO_MODE;
+}
+
+static void vlv_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ set(uncore, GEN6_RC6_WAKE_RATE_LIMIT, 0x00280000);
+ set(uncore, GEN6_RC_EVALUATION_INTERVAL, 125000);
+ set(uncore, GEN6_RC_IDLE_HYSTERSIS, 25);
+
+ for_each_engine(engine, rc6_to_gt(rc6), id)
+ set(uncore, RING_MAX_IDLE(engine->mmio_base), 10);
+
+ set(uncore, GEN6_RC6_THRESHOLD, 0x557);
+
+ /* Allows RC6 residency counter to work */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
+ VLV_MEDIA_RC0_COUNT_EN |
+ VLV_RENDER_RC0_COUNT_EN |
+ VLV_MEDIA_RC6_COUNT_EN |
+ VLV_RENDER_RC6_COUNT_EN));
+
+ rc6->ctl_enable =
+ GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
+}
+
+static bool bxt_check_bios_rc6_setup(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ u32 rc6_ctx_base, rc_ctl, rc_sw_target;
+ bool enable_rc6 = true;
+
+ rc_ctl = intel_uncore_read(uncore, GEN6_RC_CONTROL);
+ rc_sw_target = intel_uncore_read(uncore, GEN6_RC_STATE);
+ rc_sw_target &= RC_SW_TARGET_STATE_MASK;
+ rc_sw_target >>= RC_SW_TARGET_STATE_SHIFT;
+ DRM_DEBUG_DRIVER("BIOS enabled RC states: "
+ "HW_CTRL %s HW_RC6 %s SW_TARGET_STATE %x\n",
+ onoff(rc_ctl & GEN6_RC_CTL_HW_ENABLE),
+ onoff(rc_ctl & GEN6_RC_CTL_RC6_ENABLE),
+ rc_sw_target);
+
+ if (!(intel_uncore_read(uncore, RC6_LOCATION) & RC6_CTX_IN_DRAM)) {
+ DRM_DEBUG_DRIVER("RC6 Base location not set properly.\n");
+ enable_rc6 = false;
+ }
+
+ /*
+ * The exact context size is not known for BXT, so assume a page size
+ * for this check.
+ */
+ rc6_ctx_base =
+ intel_uncore_read(uncore, RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
+ if (!(rc6_ctx_base >= i915->dsm_reserved.start &&
+ rc6_ctx_base + PAGE_SIZE < i915->dsm_reserved.end)) {
+ DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
+ enable_rc6 = false;
+ }
+
+ if (!((intel_uncore_read(uncore, PWRCTX_MAXCNT_RCSUNIT) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_VCSUNIT0) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_BCSUNIT) & IDLE_TIME_MASK) > 1 &&
+ (intel_uncore_read(uncore, PWRCTX_MAXCNT_VECSUNIT) & IDLE_TIME_MASK) > 1)) {
+ DRM_DEBUG_DRIVER("Engine Idle wait time not set properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN8_PUSHBUS_CONTROL) ||
+ !intel_uncore_read(uncore, GEN8_PUSHBUS_ENABLE) ||
+ !intel_uncore_read(uncore, GEN8_PUSHBUS_SHIFT)) {
+ DRM_DEBUG_DRIVER("Pushbus not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN6_GFXPAUSE)) {
+ DRM_DEBUG_DRIVER("GFX pause not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ if (!intel_uncore_read(uncore, GEN8_MISC_CTRL0)) {
+ DRM_DEBUG_DRIVER("GPM control not setup properly.\n");
+ enable_rc6 = false;
+ }
+
+ return enable_rc6;
+}
+
+static bool rc6_supported(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!HAS_RC6(i915))
+ return false;
+
+ if (intel_vgpu_active(i915))
+ return false;
+
+ if (is_mock_gt(rc6_to_gt(rc6)))
+ return false;
+
+ if (IS_GEN9_LP(i915) && !bxt_check_bios_rc6_setup(rc6)) {
+ dev_notice(i915->drm.dev,
+ "RC6 and powersaving disabled by BIOS\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void rpm_get(struct intel_rc6 *rc6)
+{
+ GEM_BUG_ON(rc6->wakeref);
+ pm_runtime_get_sync(&rc6_to_i915(rc6)->drm.pdev->dev);
+ rc6->wakeref = true;
+}
+
+static void rpm_put(struct intel_rc6 *rc6)
+{
+ GEM_BUG_ON(!rc6->wakeref);
+ pm_runtime_put(&rc6_to_i915(rc6)->drm.pdev->dev);
+ rc6->wakeref = false;
+}
+
+static bool pctx_corrupted(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return false;
+
+ if (intel_uncore_read(rc6_to_uncore(rc6), GEN8_RC6_CTX_INFO))
+ return false;
+
+ dev_notice(i915->drm.dev,
+ "RC6 context corruption, disabling runtime power management\n");
+ return true;
+}
+
+static void __intel_rc6_disable(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ if (INTEL_GEN(i915) >= 9)
+ set(uncore, GEN9_PG_ENABLE, 0);
+ set(uncore, GEN6_RC_CONTROL, 0);
+ set(uncore, GEN6_RC_STATE, 0);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+}
+
+void intel_rc6_init(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ int err;
+
+ /* Disable runtime-pm until we can save the GPU state with rc6 pctx */
+ rpm_get(rc6);
+
+ if (!rc6_supported(rc6))
+ return;
+
+ if (IS_CHERRYVIEW(i915))
+ err = chv_rc6_init(rc6);
+ else if (IS_VALLEYVIEW(i915))
+ err = vlv_rc6_init(rc6);
+ else
+ err = 0;
+
+ /* Sanitize rc6, ensure it is disabled before we are ready. */
+ __intel_rc6_disable(rc6);
+
+ rc6->supported = err == 0;
+}
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6)
+{
+ if (rc6->enabled) { /* unbalanced suspend/resume */
+ rpm_get(rc6);
+ rc6->enabled = false;
+ }
+
+ if (rc6->supported)
+ __intel_rc6_disable(rc6);
+}
+
+void intel_rc6_enable(struct intel_rc6 *rc6)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->supported)
+ return;
+
+ GEM_BUG_ON(rc6->enabled);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ if (IS_CHERRYVIEW(i915))
+ chv_rc6_enable(rc6);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 11)
+ gen11_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 9)
+ gen9_rc6_enable(rc6);
+ else if (IS_BROADWELL(i915))
+ gen8_rc6_enable(rc6);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_rc6_enable(rc6);
+
+ rc6->manual = rc6->ctl_enable & GEN6_RC_CTL_RC6_ENABLE;
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ rc6->ctl_enable = 0;
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ if (unlikely(pctx_corrupted(rc6)))
+ return;
+
+ /* rc6 is ready, runtime-pm is go! */
+ rpm_put(rc6);
+ rc6->enabled = true;
+}
+
+void intel_rc6_unpark(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->enabled)
+ return;
+
+ /* Restore HW timers for automatic RC6 entry while busy */
+ set(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+}
+
+void intel_rc6_park(struct intel_rc6 *rc6)
+{
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+
+ if (!rc6->enabled)
+ return;
+
+ if (unlikely(pctx_corrupted(rc6))) {
+ intel_rc6_disable(rc6);
+ return;
+ }
+
+ if (!rc6->manual)
+ return;
+
+ /* Turn off the HW timers and go directly to rc6 */
+ set(uncore, GEN6_RC_CONTROL, GEN6_RC_CTL_RC6_ENABLE);
+ set(uncore, GEN6_RC_STATE, 0x4 << RC_SW_TARGET_STATE_SHIFT);
+}
+
+void intel_rc6_disable(struct intel_rc6 *rc6)
+{
+ if (!rc6->enabled)
+ return;
+
+ rpm_get(rc6);
+ rc6->enabled = false;
+
+ __intel_rc6_disable(rc6);
+}
+
+void intel_rc6_fini(struct intel_rc6 *rc6)
+{
+ struct drm_i915_gem_object *pctx;
+
+ intel_rc6_disable(rc6);
+
+ pctx = fetch_and_zero(&rc6->pctx);
+ if (pctx)
+ i915_gem_object_put(pctx);
+
+ if (rc6->wakeref)
+ rpm_put(rc6);
+}
+
+static u64 vlv_residency_raw(struct intel_uncore *uncore, const i915_reg_t reg)
+{
+ u32 lower, upper, tmp;
+ int loop = 2;
+
+ /*
+ * The register accessed do not need forcewake. We borrow
+ * uncore lock to prevent concurrent access to range reg.
+ */
+ lockdep_assert_held(&uncore->lock);
+
+ /*
+ * vlv and chv residency counters are 40 bits in width.
+ * With a control bit, we can choose between upper or lower
+ * 32bit window into this counter.
+ *
+ * Although we always use the counter in high-range mode elsewhere,
+ * userspace may attempt to read the value before rc6 is initialised,
+ * before we have set the default VLV_COUNTER_CONTROL value. So always
+ * set the high bit to be safe.
+ */
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ upper = intel_uncore_read_fw(uncore, reg);
+ do {
+ tmp = upper;
+
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
+ lower = intel_uncore_read_fw(uncore, reg);
+
+ set(uncore, VLV_COUNTER_CONTROL,
+ _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
+ upper = intel_uncore_read_fw(uncore, reg);
+ } while (upper != tmp && --loop);
+
+ /*
+ * Everywhere else we always use VLV_COUNTER_CONTROL with the
+ * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
+ * now.
+ */
+
+ return lower | (u64)upper << 8;
+}
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, const i915_reg_t reg)
+{
+ struct drm_i915_private *i915 = rc6_to_i915(rc6);
+ struct intel_uncore *uncore = rc6_to_uncore(rc6);
+ u64 time_hw, prev_hw, overflow_hw;
+ unsigned int fw_domains;
+ unsigned long flags;
+ unsigned int i;
+ u32 mul, div;
+
+ if (!rc6->supported)
+ return 0;
+
+ /*
+ * Store previous hw counter values for counter wrap-around handling.
+ *
+ * There are only four interesting registers and they live next to each
+ * other so we can use the relative address, compared to the smallest
+ * one as the index into driver storage.
+ */
+ i = (i915_mmio_reg_offset(reg) -
+ i915_mmio_reg_offset(GEN6_GT_GFX_RC6_LOCKED)) / sizeof(u32);
+ if (WARN_ON_ONCE(i >= ARRAY_SIZE(rc6->cur_residency)))
+ return 0;
+
+ fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, FW_REG_READ);
+
+ spin_lock_irqsave(&uncore->lock, flags);
+ intel_uncore_forcewake_get__locked(uncore, fw_domains);
+
+ /* On VLV and CHV, residency time is in CZ units rather than 1.28us */
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ mul = 1000000;
+ div = i915->czclk_freq;
+ overflow_hw = BIT_ULL(40);
+ time_hw = vlv_residency_raw(uncore, reg);
+ } else {
+ /* 833.33ns units on Gen9LP, 1.28us elsewhere. */
+ if (IS_GEN9_LP(i915)) {
+ mul = 10000;
+ div = 12;
+ } else {
+ mul = 1280;
+ div = 1;
+ }
+
+ overflow_hw = BIT_ULL(32);
+ time_hw = intel_uncore_read_fw(uncore, reg);
+ }
+
+ /*
+ * Counter wrap handling.
+ *
+ * But relying on a sufficient frequency of queries otherwise counters
+ * can still wrap.
+ */
+ prev_hw = rc6->prev_hw_residency[i];
+ rc6->prev_hw_residency[i] = time_hw;
+
+ /* RC6 delta from last sample. */
+ if (time_hw >= prev_hw)
+ time_hw -= prev_hw;
+ else
+ time_hw += overflow_hw - prev_hw;
+
+ /* Add delta to RC6 extended raw driver copy. */
+ time_hw += rc6->cur_residency[i];
+ rc6->cur_residency[i] = time_hw;
+
+ intel_uncore_forcewake_put__locked(uncore, fw_domains);
+ spin_unlock_irqrestore(&uncore->lock, flags);
+
+ return mul_u64_u32_div(time_hw, mul, div);
+}
+
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg)
+{
+ return DIV_ROUND_UP_ULL(intel_rc6_residency_ns(rc6, reg), 1000);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_rc6.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.h b/drivers/gpu/drm/i915/gt/intel_rc6.h
new file mode 100644
index 000000000000..9f0f23fca8af
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.h
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_H
+#define INTEL_RC6_H
+
+#include "i915_reg.h"
+
+struct intel_engine_cs;
+struct intel_rc6;
+
+void intel_rc6_init(struct intel_rc6 *rc6);
+void intel_rc6_fini(struct intel_rc6 *rc6);
+
+void intel_rc6_unpark(struct intel_rc6 *rc6);
+void intel_rc6_park(struct intel_rc6 *rc6);
+
+void intel_rc6_sanitize(struct intel_rc6 *rc6);
+void intel_rc6_enable(struct intel_rc6 *rc6);
+void intel_rc6_disable(struct intel_rc6 *rc6);
+
+u64 intel_rc6_residency_ns(struct intel_rc6 *rc6, i915_reg_t reg);
+u64 intel_rc6_residency_us(struct intel_rc6 *rc6, i915_reg_t reg);
+
+#endif /* INTEL_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
new file mode 100644
index 000000000000..bfbb623f7a4f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -0,0 +1,31 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RC6_TYPES_H
+#define INTEL_RC6_TYPES_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "intel_engine_types.h"
+
+struct drm_i915_gem_object;
+
+struct intel_rc6 {
+ u64 prev_hw_residency[4];
+ u64 cur_residency[4];
+
+ u32 ctl_enable;
+
+ struct drm_i915_gem_object *pctx;
+
+ bool supported : 1;
+ bool enabled : 1;
+ bool manual : 1;
+ bool wakeref : 1;
+};
+
+#endif /* INTEL_RC6_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c
new file mode 100644
index 000000000000..5954ecc3207f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c
@@ -0,0 +1,250 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Mika Kuoppala <mika.kuoppala@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "intel_renderstate.h"
+#include "intel_ring.h"
+
+static const struct intel_renderstate_rodata *
+render_state_get_rodata(const struct intel_engine_cs *engine)
+{
+ if (engine->class != RENDER_CLASS)
+ return NULL;
+
+ switch (INTEL_GEN(engine->i915)) {
+ case 6:
+ return &gen6_null_state;
+ case 7:
+ return &gen7_null_state;
+ case 8:
+ return &gen8_null_state;
+ case 9:
+ return &gen9_null_state;
+ }
+
+ return NULL;
+}
+
+/*
+ * Macro to add commands to auxiliary batch.
+ * This macro only checks for page overflow before inserting the commands,
+ * this is sufficient as the null state generator makes the final batch
+ * with two passes to build command and state separately. At this point
+ * the size of both are known and it compacts them by relocating the state
+ * right after the commands taking care of alignment so we should sufficient
+ * space below them for adding new commands.
+ */
+#define OUT_BATCH(batch, i, val) \
+ do { \
+ if ((i) >= PAGE_SIZE / sizeof(u32)) \
+ goto err; \
+ (batch)[(i)++] = (val); \
+ } while(0)
+
+static int render_state_setup(struct intel_renderstate *so,
+ struct drm_i915_private *i915)
+{
+ const struct intel_renderstate_rodata *rodata = so->rodata;
+ unsigned int i = 0, reloc_index = 0;
+ unsigned int needs_clflush;
+ u32 *d;
+ int ret;
+
+ ret = i915_gem_object_prepare_write(so->vma->obj, &needs_clflush);
+ if (ret)
+ return ret;
+
+ d = kmap_atomic(i915_gem_object_get_dirty_page(so->vma->obj, 0));
+
+ while (i < rodata->batch_items) {
+ u32 s = rodata->batch[i];
+
+ if (i * 4 == rodata->reloc[reloc_index]) {
+ u64 r = s + so->vma->node.start;
+ s = lower_32_bits(r);
+ if (HAS_64BIT_RELOC(i915)) {
+ if (i + 1 >= rodata->batch_items ||
+ rodata->batch[i + 1] != 0)
+ goto err;
+
+ d[i++] = s;
+ s = upper_32_bits(r);
+ }
+
+ reloc_index++;
+ }
+
+ d[i++] = s;
+ }
+
+ if (rodata->reloc[reloc_index] != -1) {
+ DRM_ERROR("only %d relocs resolved\n", reloc_index);
+ goto err;
+ }
+
+ so->batch_offset = i915_ggtt_offset(so->vma);
+ so->batch_size = rodata->batch_items * sizeof(u32);
+
+ while (i % CACHELINE_DWORDS)
+ OUT_BATCH(d, i, MI_NOOP);
+
+ so->aux_offset = i * sizeof(u32);
+
+ if (HAS_POOLED_EU(i915)) {
+ /*
+ * We always program 3x6 pool config but depending upon which
+ * subslice is disabled HW drops down to appropriate config
+ * shown below.
+ *
+ * In the below table 2x6 config always refers to
+ * fused-down version, native 2x6 is not available and can
+ * be ignored
+ *
+ * SNo subslices config eu pool configuration
+ * -----------------------------------------------------------
+ * 1 3 subslices enabled (3x6) - 0x00777000 (9+9)
+ * 2 ss0 disabled (2x6) - 0x00777000 (3+9)
+ * 3 ss1 disabled (2x6) - 0x00770000 (6+6)
+ * 4 ss2 disabled (2x6) - 0x00007000 (9+3)
+ */
+ u32 eu_pool_config = 0x00777000;
+
+ OUT_BATCH(d, i, GEN9_MEDIA_POOL_STATE);
+ OUT_BATCH(d, i, GEN9_MEDIA_POOL_ENABLE);
+ OUT_BATCH(d, i, eu_pool_config);
+ OUT_BATCH(d, i, 0);
+ OUT_BATCH(d, i, 0);
+ OUT_BATCH(d, i, 0);
+ }
+
+ OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
+ so->aux_size = i * sizeof(u32) - so->aux_offset;
+ so->aux_offset += so->batch_offset;
+ /*
+ * Since we are sending length, we need to strictly conform to
+ * all requirements. For Gen2 this must be a multiple of 8.
+ */
+ so->aux_size = ALIGN(so->aux_size, 8);
+
+ if (needs_clflush)
+ drm_clflush_virt_range(d, i * sizeof(u32));
+ kunmap_atomic(d);
+
+ ret = 0;
+out:
+ i915_gem_object_finish_access(so->vma->obj);
+ return ret;
+
+err:
+ kunmap_atomic(d);
+ ret = -EINVAL;
+ goto out;
+}
+
+#undef OUT_BATCH
+
+int intel_renderstate_init(struct intel_renderstate *so,
+ struct intel_engine_cs *engine)
+{
+ struct drm_i915_gem_object *obj;
+ int err;
+
+ memset(so, 0, sizeof(*so));
+
+ so->rodata = render_state_get_rodata(engine);
+ if (!so->rodata)
+ return 0;
+
+ if (so->rodata->batch_items * 4 > PAGE_SIZE)
+ return -EINVAL;
+
+ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ so->vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+ if (IS_ERR(so->vma)) {
+ err = PTR_ERR(so->vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ goto err_vma;
+
+ err = render_state_setup(so, engine->i915);
+ if (err)
+ goto err_unpin;
+
+ return 0;
+
+err_unpin:
+ i915_vma_unpin(so->vma);
+err_vma:
+ i915_vma_close(so->vma);
+err_obj:
+ i915_gem_object_put(obj);
+ so->vma = NULL;
+ return err;
+}
+
+int intel_renderstate_emit(struct intel_renderstate *so,
+ struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ int err;
+
+ if (!so->vma)
+ return 0;
+
+ err = engine->emit_bb_start(rq,
+ so->batch_offset, so->batch_size,
+ I915_DISPATCH_SECURE);
+ if (err)
+ return err;
+
+ if (so->aux_size > 8) {
+ err = engine->emit_bb_start(rq,
+ so->aux_offset, so->aux_size,
+ I915_DISPATCH_SECURE);
+ if (err)
+ return err;
+ }
+
+ i915_vma_lock(so->vma);
+ err = i915_request_await_object(rq, so->vma->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(so->vma, rq, 0);
+ i915_vma_unlock(so->vma);
+
+ return err;
+}
+
+void intel_renderstate_fini(struct intel_renderstate *so)
+{
+ i915_vma_unpin_and_release(&so->vma, 0);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.h b/drivers/gpu/drm/i915/gt/intel_renderstate.h
new file mode 100644
index 000000000000..5700be69a05a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_renderstate.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_RENDERSTATE_H_
+#define _INTEL_RENDERSTATE_H_
+
+#include <linux/types.h>
+
+struct i915_request;
+struct intel_engine_cs;
+struct i915_vma;
+
+struct intel_renderstate_rodata {
+ const u32 *reloc;
+ const u32 *batch;
+ const u32 batch_items;
+};
+
+#define RO_RENDERSTATE(_g) \
+ const struct intel_renderstate_rodata gen ## _g ## _null_state = { \
+ .reloc = gen ## _g ## _null_state_relocs, \
+ .batch = gen ## _g ## _null_state_batch, \
+ .batch_items = sizeof(gen ## _g ## _null_state_batch)/4, \
+ }
+
+extern const struct intel_renderstate_rodata gen6_null_state;
+extern const struct intel_renderstate_rodata gen7_null_state;
+extern const struct intel_renderstate_rodata gen8_null_state;
+extern const struct intel_renderstate_rodata gen9_null_state;
+
+struct intel_renderstate {
+ const struct intel_renderstate_rodata *rodata;
+ struct i915_vma *vma;
+ u32 batch_offset;
+ u32 batch_size;
+ u32 aux_offset;
+ u32 aux_size;
+};
+
+int intel_renderstate_init(struct intel_renderstate *so,
+ struct intel_engine_cs *engine);
+int intel_renderstate_emit(struct intel_renderstate *so,
+ struct i915_request *rq);
+void intel_renderstate_fini(struct intel_renderstate *so);
+
+#endif /* _INTEL_RENDERSTATE_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index 3f907701ef4d..beee0cf89bce 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -7,6 +7,7 @@
#include <linux/sched/mm.h>
#include <linux/stop_machine.h>
+#include "display/intel_display_types.h"
#include "display/intel_overlay.h"
#include "gem/i915_gem_context.h"
@@ -15,26 +16,18 @@
#include "i915_gpu_error.h"
#include "i915_irq.h"
#include "intel_engine_pm.h"
+#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_reset.h"
-#include "intel_guc.h"
+#include "uc/intel_guc.h"
+#include "uc/intel_guc_submission.h"
#define RESET_MAX_RETRIES 3
/* XXX How to handle concurrent GGTT updates using tiling registers? */
#define RESET_UNDER_STOP_MACHINE 0
-static void rmw_set(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
-{
- intel_uncore_rmw(uncore, reg, 0, set);
-}
-
-static void rmw_clear(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
-{
- intel_uncore_rmw(uncore, reg, clr, 0);
-}
-
static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set)
{
intel_uncore_rmw_fw(uncore, reg, 0, set);
@@ -48,28 +41,29 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
static void engine_skip_context(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *hung_ctx = rq->gem_context;
-
- lockdep_assert_held(&engine->active.lock);
+ struct intel_context *hung_ctx = rq->context;
if (!i915_request_is_active(rq))
return;
+ lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
- if (rq->gem_context == hung_ctx)
+ if (rq->context == hung_ctx)
i915_request_skip(rq, -EIO);
}
-static void client_mark_guilty(struct drm_i915_file_private *file_priv,
- const struct i915_gem_context *ctx)
+static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{
- unsigned int score;
+ struct drm_i915_file_private *file_priv = ctx->file_priv;
unsigned long prev_hang;
+ unsigned int score;
+
+ if (IS_ERR_OR_NULL(file_priv))
+ return;
- if (i915_gem_context_is_banned(ctx))
+ score = 0;
+ if (banned)
score = I915_CLIENT_SCORE_CONTEXT_BAN;
- else
- score = 0;
prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
@@ -84,17 +78,38 @@ static void client_mark_guilty(struct drm_i915_file_private *file_priv,
}
}
-static bool context_mark_guilty(struct i915_gem_context *ctx)
+static bool mark_guilty(struct i915_request *rq)
{
+ struct i915_gem_context *ctx;
unsigned long prev_hang;
bool banned;
int i;
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (!ctx)
+ return false;
+
+ if (i915_gem_context_is_closed(ctx)) {
+ intel_context_set_banned(rq->context);
+ banned = true;
+ goto out;
+ }
+
atomic_inc(&ctx->guilty_count);
/* Cool contexts are too cool to be banned! (Used for reset testing.) */
- if (!i915_gem_context_is_bannable(ctx))
- return false;
+ if (!i915_gem_context_is_bannable(ctx)) {
+ banned = false;
+ goto out;
+ }
+
+ dev_notice(ctx->i915->drm.dev,
+ "%s context reset due to GPU hang\n",
+ ctx->name);
/* Record the timestamp for the last N hangs */
prev_hang = ctx->hang_timestamp[0];
@@ -109,81 +124,43 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
if (banned) {
DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count));
- i915_gem_context_set_banned(ctx);
+ intel_context_set_banned(rq->context);
}
- if (!IS_ERR_OR_NULL(ctx->file_priv))
- client_mark_guilty(ctx->file_priv, ctx);
+ client_mark_guilty(ctx, banned);
+out:
+ i915_gem_context_put(ctx);
return banned;
}
-static void context_mark_innocent(struct i915_gem_context *ctx)
+static void mark_innocent(struct i915_request *rq)
{
- atomic_inc(&ctx->active_count);
+ struct i915_gem_context *ctx;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(rq->context->gem_context);
+ if (ctx)
+ atomic_inc(&ctx->active_count);
+ rcu_read_unlock();
}
-void i915_reset_request(struct i915_request *rq, bool guilty)
+void __i915_request_reset(struct i915_request *rq, bool guilty)
{
- GEM_TRACE("%s rq=%llx:%lld, guilty? %s\n",
- rq->engine->name,
- rq->fence.context,
- rq->fence.seqno,
- yesno(guilty));
+ RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
- lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq));
+ rcu_read_lock(); /* protect the GEM context */
if (guilty) {
i915_request_skip(rq, -EIO);
- if (context_mark_guilty(rq->gem_context))
+ if (mark_guilty(rq))
engine_skip_context(rq);
} else {
dma_fence_set_error(&rq->fence, -EAGAIN);
- context_mark_innocent(rq->gem_context);
+ mark_innocent(rq);
}
-}
-
-static void gen3_stop_engine(struct intel_engine_cs *engine)
-{
- struct intel_uncore *uncore = engine->uncore;
- const u32 base = engine->mmio_base;
-
- GEM_TRACE("%s\n", engine->name);
-
- if (intel_engine_stop_cs(engine))
- GEM_TRACE("%s: timed out on STOP_RING\n", engine->name);
-
- intel_uncore_write_fw(uncore,
- RING_HEAD(base),
- intel_uncore_read_fw(uncore, RING_TAIL(base)));
- intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
-
- intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
- intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
- intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
-
- /* The ring must be empty before it is disabled */
- intel_uncore_write_fw(uncore, RING_CTL(base), 0);
-
- /* Check acts as a post */
- if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
- GEM_TRACE("%s: ring head [%x] not parked\n",
- engine->name,
- intel_uncore_read_fw(uncore, RING_HEAD(base)));
-}
-
-static void i915_stop_engines(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask)
-{
- struct intel_engine_cs *engine;
- intel_engine_mask_t tmp;
-
- if (INTEL_GEN(i915) < 3)
- return;
-
- for_each_engine_masked(engine, i915, engine_mask, tmp)
- gen3_stop_engine(engine);
+ rcu_read_unlock();
}
static bool i915_in_reset(struct pci_dev *pdev)
@@ -194,11 +171,11 @@ static bool i915_in_reset(struct pci_dev *pdev)
return gdrst & GRDOM_RESET_STATUS;
}
-static int i915_do_reset(struct drm_i915_private *i915,
+static int i915_do_reset(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- struct pci_dev *pdev = i915->drm.pdev;
+ struct pci_dev *pdev = gt->i915->drm.pdev;
int err;
/* Assert reset for at least 20 usec, and wait for acknowledgement. */
@@ -223,22 +200,22 @@ static bool g4x_reset_complete(struct pci_dev *pdev)
return (gdrst & GRDOM_RESET_ENABLE) == 0;
}
-static int g33_do_reset(struct drm_i915_private *i915,
+static int g33_do_reset(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- struct pci_dev *pdev = i915->drm.pdev;
+ struct pci_dev *pdev = gt->i915->drm.pdev;
pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
return wait_for_atomic(g4x_reset_complete(pdev), 50);
}
-static int g4x_do_reset(struct drm_i915_private *i915,
+static int g4x_do_reset(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- struct pci_dev *pdev = i915->drm.pdev;
- struct intel_uncore *uncore = &i915->uncore;
+ struct pci_dev *pdev = gt->i915->drm.pdev;
+ struct intel_uncore *uncore = gt->uncore;
int ret;
/* WaVcpClkGateDisableForMediaReset:ctg,elk */
@@ -270,11 +247,10 @@ out:
return ret;
}
-static int ironlake_do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
- struct intel_uncore *uncore = &i915->uncore;
+ struct intel_uncore *uncore = gt->uncore;
int ret;
intel_uncore_write_fw(uncore, ILK_GDSR,
@@ -306,10 +282,9 @@ out:
}
/* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
-static int gen6_hw_domain_reset(struct drm_i915_private *i915,
- u32 hw_domain_mask)
+static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
{
- struct intel_uncore *uncore = &i915->uncore;
+ struct intel_uncore *uncore = gt->uncore;
int err;
/*
@@ -331,18 +306,18 @@ static int gen6_hw_domain_reset(struct drm_i915_private *i915,
return err;
}
-static int gen6_reset_engines(struct drm_i915_private *i915,
+static int gen6_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- struct intel_engine_cs *engine;
- const u32 hw_engine_mask[] = {
+ static const u32 hw_engine_mask[] = {
[RCS0] = GEN6_GRDOM_RENDER,
[BCS0] = GEN6_GRDOM_BLT,
[VCS0] = GEN6_GRDOM_MEDIA,
[VCS1] = GEN8_GRDOM_MEDIA2,
[VECS0] = GEN6_GRDOM_VECS,
};
+ struct intel_engine_cs *engine;
u32 hw_mask;
if (engine_mask == ALL_ENGINES) {
@@ -351,16 +326,16 @@ static int gen6_reset_engines(struct drm_i915_private *i915,
intel_engine_mask_t tmp;
hw_mask = 0;
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
}
}
- return gen6_hw_domain_reset(i915, hw_mask);
+ return gen6_hw_domain_reset(gt, hw_mask);
}
-static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
+static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
{
struct intel_uncore *uncore = engine->uncore;
u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access;
@@ -369,6 +344,7 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
i915_reg_t sfc_usage;
u32 sfc_usage_bit;
u32 sfc_reset_bit;
+ int ret;
switch (engine->class) {
case VIDEO_DECODE_CLASS:
@@ -403,27 +379,33 @@ static u32 gen11_lock_sfc(struct intel_engine_cs *engine)
}
/*
- * Tell the engine that a software reset is going to happen. The engine
- * will then try to force lock the SFC (if currently locked, it will
- * remain so until we tell the engine it is safe to unlock; if currently
- * unlocked, it will ignore this and all new lock requests). If SFC
- * ends up being locked to the engine we want to reset, we have to reset
- * it as well (we will unlock it once the reset sequence is completed).
+ * If the engine is using a SFC, tell the engine that a software reset
+ * is going to happen. The engine will then try to force lock the SFC.
+ * If SFC ends up being locked to the engine we want to reset, we have
+ * to reset it as well (we will unlock it once the reset sequence is
+ * completed).
*/
+ if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
+ return 0;
+
rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
- if (__intel_wait_for_register_fw(uncore,
- sfc_forced_lock_ack,
- sfc_forced_lock_ack_bit,
- sfc_forced_lock_ack_bit,
- 1000, 0, NULL)) {
- DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ ret = __intel_wait_for_register_fw(uncore,
+ sfc_forced_lock_ack,
+ sfc_forced_lock_ack_bit,
+ sfc_forced_lock_ack_bit,
+ 1000, 0, NULL);
+
+ /* Was the SFC released while we were trying to lock it? */
+ if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit))
return 0;
- }
- if (intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)
- return sfc_reset_bit;
+ if (ret) {
+ DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n");
+ return ret;
+ }
+ *hw_mask |= sfc_reset_bit;
return 0;
}
@@ -455,11 +437,11 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit);
}
-static int gen11_reset_engines(struct drm_i915_private *i915,
+static int gen11_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
- const u32 hw_engine_mask[] = {
+ static const u32 hw_engine_mask[] = {
[RCS0] = GEN11_GRDOM_RENDER,
[BCS0] = GEN11_GRDOM_BLT,
[VCS0] = GEN11_GRDOM_MEDIA,
@@ -478,17 +460,26 @@ static int gen11_reset_engines(struct drm_i915_private *i915,
hw_mask = GEN11_GRDOM_FULL;
} else {
hw_mask = 0;
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
hw_mask |= hw_engine_mask[engine->id];
- hw_mask |= gen11_lock_sfc(engine);
+ ret = gen11_lock_sfc(engine, &hw_mask);
+ if (ret)
+ goto sfc_unlock;
}
}
- ret = gen6_hw_domain_reset(i915, hw_mask);
+ ret = gen6_hw_domain_reset(gt, hw_mask);
+sfc_unlock:
+ /*
+ * We unlock the SFC based on the lock status and not the result of
+ * gen11_lock_sfc to make sure that we clean properly if something
+ * wrong happened during the lock (e.g. lock acquired after timeout
+ * expiration).
+ */
if (engine_mask != ALL_ENGINES)
- for_each_engine_masked(engine, i915, engine_mask, tmp)
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
gen11_unlock_sfc(engine);
return ret;
@@ -538,7 +529,7 @@ static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
_MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET));
}
-static int gen8_reset_engines(struct drm_i915_private *i915,
+static int gen8_reset_engines(struct intel_gt *gt,
intel_engine_mask_t engine_mask,
unsigned int retry)
{
@@ -547,7 +538,7 @@ static int gen8_reset_engines(struct drm_i915_private *i915,
intel_engine_mask_t tmp;
int ret;
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
goto skip_reset;
@@ -563,34 +554,45 @@ static int gen8_reset_engines(struct drm_i915_private *i915,
* We rather take context corruption instead of
* failed reset with a wedged driver/gpu. And
* active bb execution case should be covered by
- * i915_stop_engines we have before the reset.
+ * stop_engines() we have before the reset.
*/
}
- if (INTEL_GEN(i915) >= 11)
- ret = gen11_reset_engines(i915, engine_mask, retry);
+ if (INTEL_GEN(gt->i915) >= 11)
+ ret = gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(i915, engine_mask, retry);
+ ret = gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
- for_each_engine_masked(engine, i915, engine_mask, tmp)
+ for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
return ret;
}
-typedef int (*reset_func)(struct drm_i915_private *,
+static int mock_reset(struct intel_gt *gt,
+ intel_engine_mask_t mask,
+ unsigned int retry)
+{
+ return 0;
+}
+
+typedef int (*reset_func)(struct intel_gt *,
intel_engine_mask_t engine_mask,
unsigned int retry);
-static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
+static reset_func intel_get_gpu_reset(const struct intel_gt *gt)
{
- if (INTEL_GEN(i915) >= 8)
+ struct drm_i915_private *i915 = gt->i915;
+
+ if (is_mock_gt(gt))
+ return mock_reset;
+ else if (INTEL_GEN(i915) >= 8)
return gen8_reset_engines;
else if (INTEL_GEN(i915) >= 6)
return gen6_reset_engines;
else if (INTEL_GEN(i915) >= 5)
- return ironlake_do_reset;
+ return ilk_do_reset;
else if (IS_G4X(i915))
return g4x_do_reset;
else if (IS_G33(i915) || IS_PINEVIEW(i915))
@@ -601,15 +603,14 @@ static reset_func intel_get_gpu_reset(struct drm_i915_private *i915)
return NULL;
}
-int intel_gpu_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask)
+int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask)
{
const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1;
reset_func reset;
int ret = -ETIMEDOUT;
int retry;
- reset = intel_get_gpu_reset(i915);
+ reset = intel_get_gpu_reset(gt);
if (!reset)
return -ENODEV;
@@ -617,59 +618,45 @@ int intel_gpu_reset(struct drm_i915_private *i915,
* If the power well sleeps during the reset, the reset
* request may be dropped and never completes (causing -EIO).
*/
- intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) {
- /*
- * We stop engines, otherwise we might get failed reset and a
- * dead gpu (on elk). Also as modern gpu as kbl can suffer
- * from system hang if batchbuffer is progressing when
- * the reset is issued, regardless of READY_TO_RESET ack.
- * Thus assume it is best to stop engines on all gens
- * where we have a gpu reset.
- *
- * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
- *
- * WaMediaResetMainRingCleanup:ctg,elk (presumably)
- *
- * FIXME: Wa for more modern gens needs to be validated
- */
- if (retry)
- i915_stop_engines(i915, engine_mask);
-
- GEM_TRACE("engine_mask=%x\n", engine_mask);
+ GT_TRACE(gt, "engine_mask=%x\n", engine_mask);
preempt_disable();
- ret = reset(i915, engine_mask, retry);
+ ret = reset(gt, engine_mask, retry);
preempt_enable();
}
- intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
return ret;
}
-bool intel_has_gpu_reset(struct drm_i915_private *i915)
+bool intel_has_gpu_reset(const struct intel_gt *gt)
{
if (!i915_modparams.reset)
return NULL;
- return intel_get_gpu_reset(i915);
+ return intel_get_gpu_reset(gt);
}
-bool intel_has_reset_engine(struct drm_i915_private *i915)
+bool intel_has_reset_engine(const struct intel_gt *gt)
{
- return INTEL_INFO(i915)->has_reset_engine && i915_modparams.reset >= 2;
+ if (i915_modparams.reset < 2)
+ return false;
+
+ return INTEL_INFO(gt->i915)->has_reset_engine;
}
-int intel_reset_guc(struct drm_i915_private *i915)
+int intel_reset_guc(struct intel_gt *gt)
{
u32 guc_domain =
- INTEL_GEN(i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
+ INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC;
int ret;
- GEM_BUG_ON(!HAS_GUC(i915));
+ GEM_BUG_ON(!HAS_GT_UC(gt->i915));
- intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- ret = gen6_hw_domain_reset(i915, guc_domain);
- intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
+ ret = gen6_hw_domain_reset(gt, guc_domain);
+ intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
return ret;
}
@@ -688,59 +675,64 @@ static void reset_prepare_engine(struct intel_engine_cs *engine)
* GPU state upon resume, i.e. fail to restart after a reset.
*/
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
- engine->reset.prepare(engine);
+ if (engine->reset.prepare)
+ engine->reset.prepare(engine);
}
-static void revoke_mmaps(struct drm_i915_private *i915)
+static void revoke_mmaps(struct intel_gt *gt)
{
int i;
- for (i = 0; i < i915->ggtt.num_fences; i++) {
+ for (i = 0; i < gt->ggtt->num_fences; i++) {
struct drm_vma_offset_node *node;
struct i915_vma *vma;
u64 vma_offset;
- vma = READ_ONCE(i915->ggtt.fence_regs[i].vma);
+ vma = READ_ONCE(gt->ggtt->fence_regs[i].vma);
if (!vma)
continue;
if (!i915_vma_has_userfault(vma))
continue;
- GEM_BUG_ON(vma->fence != &i915->ggtt.fence_regs[i]);
- node = &vma->obj->base.vma_node;
+ GEM_BUG_ON(vma->fence != &gt->ggtt->fence_regs[i]);
+
+ if (!vma->mmo)
+ continue;
+
+ node = &vma->mmo->vma_node;
vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT;
- unmap_mapping_range(i915->drm.anon_inode->i_mapping,
+
+ unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping,
drm_vma_node_offset_addr(node) + vma_offset,
vma->size,
1);
}
}
-static intel_engine_mask_t reset_prepare(struct drm_i915_private *i915)
+static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
intel_engine_mask_t awake = 0;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (intel_engine_pm_get_if_awake(engine))
awake |= engine->mask;
reset_prepare_engine(engine);
}
- intel_uc_reset_prepare(i915);
+ intel_uc_reset_prepare(&gt->uc);
return awake;
}
-static void gt_revoke(struct drm_i915_private *i915)
+static void gt_revoke(struct intel_gt *gt)
{
- revoke_mmaps(i915);
+ revoke_mmaps(gt);
}
-static int gt_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask)
+static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -750,33 +742,33 @@ static int gt_reset(struct drm_i915_private *i915,
* Everything depends on having the GTT running, so we need to start
* there.
*/
- err = i915_ggtt_enable_hw(i915);
+ err = i915_ggtt_enable_hw(gt->i915);
if (err)
return err;
- for_each_engine(engine, i915, id)
- intel_engine_reset(engine, stalled_mask & engine->mask);
+ for_each_engine(engine, gt, id)
+ __intel_engine_reset(engine, stalled_mask & engine->mask);
- i915_gem_restore_fences(i915);
+ i915_gem_restore_fences(gt->ggtt);
return err;
}
static void reset_finish_engine(struct intel_engine_cs *engine)
{
- engine->reset.finish(engine);
+ if (engine->reset.finish)
+ engine->reset.finish(engine);
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
intel_engine_signal_breadcrumbs(engine);
}
-static void reset_finish(struct drm_i915_private *i915,
- intel_engine_mask_t awake)
+static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
reset_finish_engine(engine);
if (awake & engine->mask)
intel_engine_pm_put(engine);
@@ -788,8 +780,7 @@ static void nop_submit_request(struct i915_request *request)
struct intel_engine_cs *engine = request->engine;
unsigned long flags;
- GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
- engine->name, request->fence.context, request->fence.seqno);
+ RQ_TRACE(request, "-EIO\n");
dma_fence_set_error(&request->fence, -EIO);
spin_lock_irqsave(&engine->active.lock, flags);
@@ -797,44 +788,40 @@ static void nop_submit_request(struct i915_request *request)
i915_request_mark_complete(request);
spin_unlock_irqrestore(&engine->active.lock, flags);
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
}
-static void __i915_gem_set_wedged(struct drm_i915_private *i915)
+static void __intel_gt_set_wedged(struct intel_gt *gt)
{
- struct i915_gpu_error *error = &i915->gpu_error;
struct intel_engine_cs *engine;
intel_engine_mask_t awake;
enum intel_engine_id id;
- if (test_bit(I915_WEDGED, &error->flags))
+ if (test_bit(I915_WEDGED, &gt->reset.flags))
return;
- if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
+ if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) {
struct drm_printer p = drm_debug_printer(__func__);
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
intel_engine_dump(engine, &p, "%s\n", engine->name);
}
- GEM_TRACE("start\n");
+ GT_TRACE(gt, "start\n");
/*
* First, stop submission to hw, but do not yet complete requests by
* rolling the global seqno forward (since this would complete requests
* for which we haven't set the fence error to EIO yet).
*/
- awake = reset_prepare(i915);
+ awake = reset_prepare(gt);
/* Even if the GPU reset fails, it should still stop the engines */
- if (!INTEL_INFO(i915)->gpu_reset_clobbers_display)
- intel_gpu_reset(i915, ALL_ENGINES);
+ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ __intel_gt_reset(gt, ALL_ENGINES);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id)
engine->submit_request = nop_submit_request;
- engine->schedule = NULL;
- }
- i915->caps.scheduler = 0;
/*
* Make sure no request can slip through without getting completed by
@@ -842,40 +829,42 @@ static void __i915_gem_set_wedged(struct drm_i915_private *i915)
* in nop_submit_request.
*/
synchronize_rcu_expedited();
- set_bit(I915_WEDGED, &error->flags);
+ set_bit(I915_WEDGED, &gt->reset.flags);
/* Mark all executing requests as skipped */
- for_each_engine(engine, i915, id)
- engine->cancel_requests(engine);
+ for_each_engine(engine, gt, id)
+ if (engine->reset.cancel)
+ engine->reset.cancel(engine);
- reset_finish(i915, awake);
+ reset_finish(gt, awake);
- GEM_TRACE("end\n");
+ GT_TRACE(gt, "end\n");
}
-void i915_gem_set_wedged(struct drm_i915_private *i915)
+void intel_gt_set_wedged(struct intel_gt *gt)
{
- struct i915_gpu_error *error = &i915->gpu_error;
intel_wakeref_t wakeref;
- mutex_lock(&error->wedge_mutex);
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- __i915_gem_set_wedged(i915);
- mutex_unlock(&error->wedge_mutex);
+ mutex_lock(&gt->reset.mutex);
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ __intel_gt_set_wedged(gt);
+ mutex_unlock(&gt->reset.mutex);
}
-static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
+static bool __intel_gt_unset_wedged(struct intel_gt *gt)
{
- struct i915_gpu_error *error = &i915->gpu_error;
- struct i915_timeline *tl;
+ struct intel_gt_timelines *timelines = &gt->timelines;
+ struct intel_timeline *tl;
+ bool ok;
- if (!test_bit(I915_WEDGED, &error->flags))
+ if (!test_bit(I915_WEDGED, &gt->reset.flags))
return true;
- if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
+ /* Never fully initialised, recovery impossible */
+ if (test_bit(I915_WEDGED_ON_INIT, &gt->reset.flags))
return false;
- GEM_TRACE("start\n");
+ GT_TRACE(gt, "start\n");
/*
* Before unwedging, make sure that all pending operations
@@ -887,14 +876,16 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
*
* No more can be submitted until we reset the wedged bit.
*/
- mutex_lock(&i915->gt.timelines.mutex);
- list_for_each_entry(tl, &i915->gt.timelines.active_list, link) {
- struct i915_request *rq;
+ spin_lock(&timelines->lock);
+ list_for_each_entry(tl, &timelines->active_list, link) {
+ struct dma_fence *fence;
- rq = i915_active_request_get_unlocked(&tl->last_request);
- if (!rq)
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
continue;
+ spin_unlock(&timelines->lock);
+
/*
* All internal dependencies (i915_requests) will have
* been flushed by the set-wedge, but we may be stuck waiting
@@ -902,12 +893,27 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
* (I915_FENCE_TIMEOUT) so this wait should not be unbounded
* in the worst case.
*/
- dma_fence_default_wait(&rq->fence, false, MAX_SCHEDULE_TIMEOUT);
- i915_request_put(rq);
+ dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT);
+ dma_fence_put(fence);
+
+ /* Restart iteration after droping lock */
+ spin_lock(&timelines->lock);
+ tl = list_entry(&timelines->active_list, typeof(*tl), link);
}
- mutex_unlock(&i915->gt.timelines.mutex);
+ spin_unlock(&timelines->lock);
- intel_gt_sanitize(i915, false);
+ /* We must reset pending GPU events before restoring our submission */
+ ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */
+ if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ ok = __intel_gt_reset(gt, ALL_ENGINES) == 0;
+ if (!ok) {
+ /*
+ * Warn CI about the unrecoverable wedged condition.
+ * Time for a reboot.
+ */
+ add_taint_for_CI(TAINT_WARN);
+ return false;
+ }
/*
* Undo nop_submit_request. We prevent all new i915 requests from
@@ -918,53 +924,51 @@ static bool __i915_gem_unset_wedged(struct drm_i915_private *i915)
* the nop_submit_request on reset, we can do this from normal
* context and do not require stop_machine().
*/
- intel_engines_reset_default_submission(i915);
+ intel_engines_reset_default_submission(gt);
- GEM_TRACE("end\n");
+ GT_TRACE(gt, "end\n");
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
- clear_bit(I915_WEDGED, &i915->gpu_error.flags);
+ clear_bit(I915_WEDGED, &gt->reset.flags);
return true;
}
-bool i915_gem_unset_wedged(struct drm_i915_private *i915)
+bool intel_gt_unset_wedged(struct intel_gt *gt)
{
- struct i915_gpu_error *error = &i915->gpu_error;
bool result;
- mutex_lock(&error->wedge_mutex);
- result = __i915_gem_unset_wedged(i915);
- mutex_unlock(&error->wedge_mutex);
+ mutex_lock(&gt->reset.mutex);
+ result = __intel_gt_unset_wedged(gt);
+ mutex_unlock(&gt->reset.mutex);
return result;
}
-static int do_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask)
+static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
{
int err, i;
- gt_revoke(i915);
+ gt_revoke(gt);
- err = intel_gpu_reset(i915, ALL_ENGINES);
+ err = __intel_gt_reset(gt, ALL_ENGINES);
for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
msleep(10 * (i + 1));
- err = intel_gpu_reset(i915, ALL_ENGINES);
+ err = __intel_gt_reset(gt, ALL_ENGINES);
}
if (err)
return err;
- return gt_reset(i915, stalled_mask);
+ return gt_reset(gt, stalled_mask);
}
-static int resume(struct drm_i915_private *i915)
+static int resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
int ret;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
ret = engine->resume(engine);
if (ret)
return ret;
@@ -974,8 +978,8 @@ static int resume(struct drm_i915_private *i915)
}
/**
- * i915_reset - reset chip after a hang
- * @i915: #drm_i915_private to reset
+ * intel_gt_reset - reset chip after a hang
+ * @gt: #intel_gt to reset
* @stalled_mask: mask of the stalled engines with the guilty requests
* @reason: user error message for why we are resetting
*
@@ -990,50 +994,50 @@ static int resume(struct drm_i915_private *i915)
* - re-init interrupt state
* - re-init display
*/
-void i915_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask,
- const char *reason)
+void intel_gt_reset(struct intel_gt *gt,
+ intel_engine_mask_t stalled_mask,
+ const char *reason)
{
- struct i915_gpu_error *error = &i915->gpu_error;
intel_engine_mask_t awake;
int ret;
- GEM_TRACE("flags=%lx\n", error->flags);
+ GT_TRACE(gt, "flags=%lx\n", gt->reset.flags);
might_sleep();
- GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
- mutex_lock(&error->wedge_mutex);
+ GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
+ mutex_lock(&gt->reset.mutex);
/* Clear any previous failed attempts at recovery. Time to try again. */
- if (!__i915_gem_unset_wedged(i915))
+ if (!__intel_gt_unset_wedged(gt))
goto unlock;
if (reason)
- dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
- error->reset_count++;
+ dev_notice(gt->i915->drm.dev,
+ "Resetting chip for %s\n", reason);
+ atomic_inc(&gt->i915->gpu_error.reset_count);
- awake = reset_prepare(i915);
+ awake = reset_prepare(gt);
- if (!intel_has_gpu_reset(i915)) {
+ if (!intel_has_gpu_reset(gt)) {
if (i915_modparams.reset)
- dev_err(i915->drm.dev, "GPU reset not supported\n");
+ dev_err(gt->i915->drm.dev, "GPU reset not supported\n");
else
DRM_DEBUG_DRIVER("GPU reset disabled\n");
goto error;
}
- if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_disable_interrupts(i915);
+ if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ intel_runtime_pm_disable_interrupts(gt->i915);
- if (do_reset(i915, stalled_mask)) {
- dev_err(i915->drm.dev, "Failed to reset chip\n");
+ if (do_reset(gt, stalled_mask)) {
+ dev_err(gt->i915->drm.dev, "Failed to reset chip\n");
goto taint;
}
- if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
- intel_runtime_pm_enable_interrupts(i915);
+ if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display)
+ intel_runtime_pm_enable_interrupts(gt->i915);
- intel_overlay_reset(i915);
+ intel_overlay_reset(gt->i915);
/*
* Next we need to restore the context, but we don't use those
@@ -1043,23 +1047,21 @@ void i915_reset(struct drm_i915_private *i915,
* was running at the time of the reset (i.e. we weren't VT
* switched away).
*/
- ret = i915_gem_init_hw(i915);
+ ret = intel_gt_init_hw(gt);
if (ret) {
DRM_ERROR("Failed to initialise HW following reset (%d)\n",
ret);
goto taint;
}
- ret = resume(i915);
+ ret = resume(gt);
if (ret)
goto taint;
- i915_queue_hangcheck(i915);
-
finish:
- reset_finish(i915, awake);
+ reset_finish(gt, awake);
unlock:
- mutex_unlock(&error->wedge_mutex);
+ mutex_unlock(&gt->reset.mutex);
return;
taint:
@@ -1077,18 +1079,17 @@ taint:
*/
add_taint_for_CI(TAINT_WARN);
error:
- __i915_gem_set_wedged(i915);
+ __intel_gt_set_wedged(gt);
goto finish;
}
-static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
- struct intel_engine_cs *engine)
+static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
{
- return intel_gpu_reset(i915, engine->mask);
+ return __intel_gt_reset(engine->gt, engine->mask);
}
/**
- * i915_reset_engine - reset GPU engine to recover from a hang
+ * intel_engine_reset - reset GPU engine to recover from a hang
* @engine: engine to reset
* @msg: reason for GPU reset; or NULL for no dev_notice()
*
@@ -1100,13 +1101,14 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *i915,
* - reset engine (which will force the engine to idle)
* - re-init/configure engine
*/
-int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
+int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{
- struct i915_gpu_error *error = &engine->i915->gpu_error;
+ struct intel_gt *gt = engine->gt;
+ bool uses_guc = intel_engine_in_guc_submission_mode(engine);
int ret;
- GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
- GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
+ ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
+ GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));
if (!intel_engine_pm_get_if_awake(engine))
return 0;
@@ -1116,16 +1118,16 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
if (msg)
dev_notice(engine->i915->drm.dev,
"Resetting %s for %s\n", engine->name, msg);
- error->reset_engine_count[engine->id]++;
+ atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);
- if (!engine->i915->guc.execbuf_client)
- ret = intel_gt_reset_engine(engine->i915, engine);
+ if (!uses_guc)
+ ret = intel_gt_reset_engine(engine);
else
- ret = intel_guc_reset_engine(&engine->i915->guc, engine);
+ ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) {
/* If we fail here, we expect to fallback to a global reset */
DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n",
- engine->i915->guc.execbuf_client ? "GuC " : "",
+ uses_guc ? "GuC " : "",
engine->name, ret);
goto out;
}
@@ -1135,7 +1137,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
* active request and can drop it, adjust head to skip the offending
* request to resume executing remaining requests in the queue.
*/
- intel_engine_reset(engine, true);
+ __intel_engine_reset(engine, true);
/*
* The engine and its registers (and workarounds in case of render)
@@ -1147,20 +1149,19 @@ int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
out:
intel_engine_cancel_stop_cs(engine);
reset_finish_engine(engine);
- intel_engine_pm_put(engine);
+ intel_engine_pm_put_async(engine);
return ret;
}
-static void i915_reset_device(struct drm_i915_private *i915,
- u32 engine_mask,
- const char *reason)
+static void intel_gt_reset_global(struct intel_gt *gt,
+ u32 engine_mask,
+ const char *reason)
{
- struct i915_gpu_error *error = &i915->gpu_error;
- struct kobject *kobj = &i915->drm.primary->kdev->kobj;
+ struct kobject *kobj = &gt->i915->drm.primary->kdev->kobj;
char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
- struct i915_wedge_me w;
+ struct intel_wedge_me w;
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
@@ -1168,137 +1169,24 @@ static void i915_reset_device(struct drm_i915_private *i915,
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
/* Use a watchdog to ensure that our reset completes */
- i915_wedge_on_timeout(&w, i915, 5 * HZ) {
- intel_prepare_reset(i915);
+ intel_wedge_on_timeout(&w, gt, 5 * HZ) {
+ intel_prepare_reset(gt->i915);
/* Flush everyone using a resource about to be clobbered */
- synchronize_srcu_expedited(&error->reset_backoff_srcu);
+ synchronize_srcu_expedited(&gt->reset.backoff_srcu);
- i915_reset(i915, engine_mask, reason);
+ intel_gt_reset(gt, engine_mask, reason);
- intel_finish_reset(i915);
+ intel_finish_reset(gt->i915);
}
- if (!test_bit(I915_WEDGED, &error->flags))
+ if (!test_bit(I915_WEDGED, &gt->reset.flags))
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event);
}
-static void clear_register(struct intel_uncore *uncore, i915_reg_t reg)
-{
- intel_uncore_rmw(uncore, reg, 0, 0);
-}
-
-static void gen8_clear_engine_error_register(struct intel_engine_cs *engine)
-{
- GEN6_RING_FAULT_REG_RMW(engine, RING_FAULT_VALID, 0);
- GEN6_RING_FAULT_REG_POSTING_READ(engine);
-}
-
-static void clear_error_registers(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask)
-{
- struct intel_uncore *uncore = &i915->uncore;
- u32 eir;
-
- if (!IS_GEN(i915, 2))
- clear_register(uncore, PGTBL_ER);
-
- if (INTEL_GEN(i915) < 4)
- clear_register(uncore, IPEIR(RENDER_RING_BASE));
- else
- clear_register(uncore, IPEIR_I965);
-
- clear_register(uncore, EIR);
- eir = intel_uncore_read(uncore, EIR);
- if (eir) {
- /*
- * some errors might have become stuck,
- * mask them.
- */
- DRM_DEBUG_DRIVER("EIR stuck: 0x%08x, masking\n", eir);
- rmw_set(uncore, EMR, eir);
- intel_uncore_write(uncore, GEN2_IIR,
- I915_MASTER_ERROR_INTERRUPT);
- }
-
- if (INTEL_GEN(i915) >= 8) {
- rmw_clear(uncore, GEN8_RING_FAULT_REG, RING_FAULT_VALID);
- intel_uncore_posting_read(uncore, GEN8_RING_FAULT_REG);
- } else if (INTEL_GEN(i915) >= 6) {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- for_each_engine_masked(engine, i915, engine_mask, id)
- gen8_clear_engine_error_register(engine);
- }
-}
-
-static void gen6_check_faults(struct drm_i915_private *dev_priv)
-{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
- u32 fault;
-
- for_each_engine(engine, dev_priv, id) {
- fault = GEN6_RING_FAULT_REG_READ(engine);
- if (fault & RING_FAULT_VALID) {
- DRM_DEBUG_DRIVER("Unexpected fault\n"
- "\tAddr: 0x%08lx\n"
- "\tAddress space: %s\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- fault & PAGE_MASK,
- fault & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
- }
-}
-
-static void gen8_check_faults(struct drm_i915_private *dev_priv)
-{
- u32 fault = I915_READ(GEN8_RING_FAULT_REG);
-
- if (fault & RING_FAULT_VALID) {
- u32 fault_data0, fault_data1;
- u64 fault_addr;
-
- fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
- fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
- fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
- ((u64)fault_data0 << 12);
-
- DRM_DEBUG_DRIVER("Unexpected fault\n"
- "\tAddr: 0x%08x_%08x\n"
- "\tAddress space: %s\n"
- "\tEngine ID: %d\n"
- "\tSource ID: %d\n"
- "\tType: %d\n",
- upper_32_bits(fault_addr),
- lower_32_bits(fault_addr),
- fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
- GEN8_RING_FAULT_ENGINE_ID(fault),
- RING_FAULT_SRCID(fault),
- RING_FAULT_FAULT_TYPE(fault));
- }
-}
-
-void i915_check_and_clear_faults(struct drm_i915_private *i915)
-{
- /* From GEN8 onwards we only have one 'All Engine Fault Register' */
- if (INTEL_GEN(i915) >= 8)
- gen8_check_faults(i915);
- else if (INTEL_GEN(i915) >= 6)
- gen6_check_faults(i915);
- else
- return;
-
- clear_error_registers(i915, ALL_ENGINES);
-}
-
/**
- * i915_handle_error - handle a gpu error
- * @i915: i915 device private
+ * intel_gt_handle_error - handle a gpu error
+ * @gt: the intel_gt
* @engine_mask: mask representing engines that are hung
* @flags: control flags
* @fmt: Error message format string
@@ -1309,12 +1197,11 @@ void i915_check_and_clear_faults(struct drm_i915_private *i915)
* so userspace knows something bad happened (should trigger collection
* of a ring dump etc.).
*/
-void i915_handle_error(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned long flags,
- const char *fmt, ...)
+void intel_gt_handle_error(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned long flags,
+ const char *fmt, ...)
{
- struct i915_gpu_error *error = &i915->gpu_error;
struct intel_engine_cs *engine;
intel_wakeref_t wakeref;
intel_engine_mask_t tmp;
@@ -1338,33 +1225,31 @@ void i915_handle_error(struct drm_i915_private *i915,
* isn't the case at least when we get here by doing a
* simulated reset via debugfs, so get an RPM reference.
*/
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- engine_mask &= INTEL_INFO(i915)->engine_mask;
+ engine_mask &= INTEL_INFO(gt->i915)->engine_mask;
if (flags & I915_ERROR_CAPTURE) {
- i915_capture_error_state(i915, engine_mask, msg);
- clear_error_registers(i915, engine_mask);
+ i915_capture_error_state(gt->i915);
+ intel_gt_clear_error_registers(gt, engine_mask);
}
/*
* Try engine reset when available. We fall back to full reset if
* single reset fails.
*/
- if (intel_has_reset_engine(i915) && !__i915_wedged(error)) {
- for_each_engine_masked(engine, i915, engine_mask, tmp) {
+ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
+ for_each_engine_masked(engine, gt, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &error->flags))
+ &gt->reset.flags))
continue;
- if (i915_reset_engine(engine, msg) == 0)
+ if (intel_engine_reset(engine, msg) == 0)
engine_mask &= ~engine->mask;
- clear_bit(I915_RESET_ENGINE + engine->id,
- &error->flags);
- wake_up_bit(&error->flags,
- I915_RESET_ENGINE + engine->id);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags);
}
}
@@ -1372,9 +1257,9 @@ void i915_handle_error(struct drm_i915_private *i915,
goto out;
/* Full reset needs the mutex, stop any other user trying to do so. */
- if (test_and_set_bit(I915_RESET_BACKOFF, &error->flags)) {
- wait_event(error->reset_queue,
- !test_bit(I915_RESET_BACKOFF, &error->flags));
+ if (test_and_set_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
+ wait_event(gt->reset.queue,
+ !test_bit(I915_RESET_BACKOFF, &gt->reset.flags));
goto out; /* piggy-back on the other reset */
}
@@ -1382,115 +1267,127 @@ void i915_handle_error(struct drm_i915_private *i915,
synchronize_rcu_expedited();
/* Prevent any other reset-engine attempt. */
- for_each_engine(engine, i915, tmp) {
+ for_each_engine(engine, gt, tmp) {
while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
- &error->flags))
- wait_on_bit(&error->flags,
+ &gt->reset.flags))
+ wait_on_bit(&gt->reset.flags,
I915_RESET_ENGINE + engine->id,
TASK_UNINTERRUPTIBLE);
}
- i915_reset_device(i915, engine_mask, msg);
-
- for_each_engine(engine, i915, tmp) {
- clear_bit(I915_RESET_ENGINE + engine->id,
- &error->flags);
- }
+ intel_gt_reset_global(gt, engine_mask, msg);
- clear_bit(I915_RESET_BACKOFF, &error->flags);
- wake_up_all(&error->reset_queue);
+ for_each_engine(engine, gt, tmp)
+ clear_bit_unlock(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags);
+ clear_bit_unlock(I915_RESET_BACKOFF, &gt->reset.flags);
+ smp_mb__after_atomic();
+ wake_up_all(&gt->reset.queue);
out:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
-int i915_reset_trylock(struct drm_i915_private *i915)
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{
- struct i915_gpu_error *error = &i915->gpu_error;
- int srcu;
-
- might_lock(&error->reset_backoff_srcu);
+ might_lock(&gt->reset.backoff_srcu);
might_sleep();
rcu_read_lock();
- while (test_bit(I915_RESET_BACKOFF, &error->flags)) {
+ while (test_bit(I915_RESET_BACKOFF, &gt->reset.flags)) {
rcu_read_unlock();
- if (wait_event_interruptible(error->reset_queue,
+ if (wait_event_interruptible(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF,
- &error->flags)))
+ &gt->reset.flags)))
return -EINTR;
rcu_read_lock();
}
- srcu = srcu_read_lock(&error->reset_backoff_srcu);
+ *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock();
- return srcu;
+ return 0;
}
-void i915_reset_unlock(struct drm_i915_private *i915, int tag)
-__releases(&i915->gpu_error.reset_backoff_srcu)
+void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
+__releases(&gt->reset.backoff_srcu)
{
- struct i915_gpu_error *error = &i915->gpu_error;
-
- srcu_read_unlock(&error->reset_backoff_srcu, tag);
+ srcu_read_unlock(&gt->reset.backoff_srcu, tag);
}
-int i915_terminally_wedged(struct drm_i915_private *i915)
+int intel_gt_terminally_wedged(struct intel_gt *gt)
{
- struct i915_gpu_error *error = &i915->gpu_error;
-
might_sleep();
- if (!__i915_wedged(error))
+ if (!intel_gt_is_wedged(gt))
return 0;
- /* Reset still in progress? Maybe we will recover? */
- if (!test_bit(I915_RESET_BACKOFF, &error->flags))
+ if (intel_gt_has_init_error(gt))
return -EIO;
- /* XXX intel_reset_finish() still takes struct_mutex!!! */
- if (mutex_is_locked(&i915->drm.struct_mutex))
- return -EAGAIN;
-
- if (wait_event_interruptible(error->reset_queue,
+ /* Reset still in progress? Maybe we will recover? */
+ if (wait_event_interruptible(gt->reset.queue,
!test_bit(I915_RESET_BACKOFF,
- &error->flags)))
+ &gt->reset.flags)))
return -EINTR;
- return __i915_wedged(error) ? -EIO : 0;
+ return intel_gt_is_wedged(gt) ? -EIO : 0;
+}
+
+void intel_gt_set_wedged_on_init(struct intel_gt *gt)
+{
+ BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
+ I915_WEDGED_ON_INIT);
+ intel_gt_set_wedged(gt);
+ set_bit(I915_WEDGED_ON_INIT, &gt->reset.flags);
+}
+
+void intel_gt_init_reset(struct intel_gt *gt)
+{
+ init_waitqueue_head(&gt->reset.queue);
+ mutex_init(&gt->reset.mutex);
+ init_srcu_struct(&gt->reset.backoff_srcu);
+
+ /* no GPU until we are ready! */
+ __set_bit(I915_WEDGED, &gt->reset.flags);
+}
+
+void intel_gt_fini_reset(struct intel_gt *gt)
+{
+ cleanup_srcu_struct(&gt->reset.backoff_srcu);
}
-static void i915_wedge_me(struct work_struct *work)
+static void intel_wedge_me(struct work_struct *work)
{
- struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
+ struct intel_wedge_me *w = container_of(work, typeof(*w), work.work);
- dev_err(w->i915->drm.dev,
+ dev_err(w->gt->i915->drm.dev,
"%s timed out, cancelling all in-flight rendering.\n",
w->name);
- i915_gem_set_wedged(w->i915);
+ intel_gt_set_wedged(w->gt);
}
-void __i915_init_wedge(struct i915_wedge_me *w,
- struct drm_i915_private *i915,
- long timeout,
- const char *name)
+void __intel_init_wedge(struct intel_wedge_me *w,
+ struct intel_gt *gt,
+ long timeout,
+ const char *name)
{
- w->i915 = i915;
+ w->gt = gt;
w->name = name;
- INIT_DELAYED_WORK_ONSTACK(&w->work, i915_wedge_me);
+ INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me);
schedule_delayed_work(&w->work, timeout);
}
-void __i915_fini_wedge(struct i915_wedge_me *w)
+void __intel_fini_wedge(struct intel_wedge_me *w)
{
cancel_delayed_work_sync(&w->work);
destroy_delayed_work_on_stack(&w->work);
- w->i915 = NULL;
+ w->gt = NULL;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_reset.c"
+#include "selftest_hangcheck.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 580ebdb59eca..8e8d5f761166 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -11,58 +11,75 @@
#include <linux/types.h>
#include <linux/srcu.h>
-#include "gt/intel_engine_types.h"
+#include "intel_engine_types.h"
+#include "intel_reset_types.h"
-struct drm_i915_private;
struct i915_request;
struct intel_engine_cs;
+struct intel_gt;
struct intel_guc;
+void intel_gt_init_reset(struct intel_gt *gt);
+void intel_gt_fini_reset(struct intel_gt *gt);
+
__printf(4, 5)
-void i915_handle_error(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask,
- unsigned long flags,
- const char *fmt, ...);
+void intel_gt_handle_error(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned long flags,
+ const char *fmt, ...);
#define I915_ERROR_CAPTURE BIT(0)
-void i915_check_and_clear_faults(struct drm_i915_private *i915);
-
-void i915_reset(struct drm_i915_private *i915,
- intel_engine_mask_t stalled_mask,
- const char *reason);
-int i915_reset_engine(struct intel_engine_cs *engine,
- const char *reason);
+void intel_gt_reset(struct intel_gt *gt,
+ intel_engine_mask_t stalled_mask,
+ const char *reason);
+int intel_engine_reset(struct intel_engine_cs *engine,
+ const char *reason);
-void i915_reset_request(struct i915_request *rq, bool guilty);
+void __i915_request_reset(struct i915_request *rq, bool guilty);
-int __must_check i915_reset_trylock(struct drm_i915_private *i915);
-void i915_reset_unlock(struct drm_i915_private *i915, int tag);
+int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
+void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
-int i915_terminally_wedged(struct drm_i915_private *i915);
+void intel_gt_set_wedged(struct intel_gt *gt);
+bool intel_gt_unset_wedged(struct intel_gt *gt);
+int intel_gt_terminally_wedged(struct intel_gt *gt);
-bool intel_has_gpu_reset(struct drm_i915_private *i915);
-bool intel_has_reset_engine(struct drm_i915_private *i915);
+/*
+ * There's no unset_wedged_on_init paired with this one.
+ * Once we're wedged on init, there's no going back.
+ */
+void intel_gt_set_wedged_on_init(struct intel_gt *gt);
-int intel_gpu_reset(struct drm_i915_private *i915,
- intel_engine_mask_t engine_mask);
+int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask);
-int intel_reset_guc(struct drm_i915_private *i915);
+int intel_reset_guc(struct intel_gt *gt);
-struct i915_wedge_me {
+struct intel_wedge_me {
struct delayed_work work;
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
const char *name;
};
-void __i915_init_wedge(struct i915_wedge_me *w,
- struct drm_i915_private *i915,
- long timeout,
- const char *name);
-void __i915_fini_wedge(struct i915_wedge_me *w);
+void __intel_init_wedge(struct intel_wedge_me *w,
+ struct intel_gt *gt,
+ long timeout,
+ const char *name);
+void __intel_fini_wedge(struct intel_wedge_me *w);
+
+#define intel_wedge_on_timeout(W, GT, TIMEOUT) \
+ for (__intel_init_wedge((W), (GT), (TIMEOUT), __func__); \
+ (W)->gt; \
+ __intel_fini_wedge((W)))
+
+static inline bool __intel_reset_failed(const struct intel_reset *reset)
+{
+ GEM_BUG_ON(test_bit(I915_WEDGED_ON_INIT, &reset->flags) ?
+ !test_bit(I915_WEDGED, &reset->flags) : false);
+
+ return unlikely(test_bit(I915_WEDGED, &reset->flags));
+}
-#define i915_wedge_on_timeout(W, DEV, TIMEOUT) \
- for (__i915_init_wedge((W), (DEV), (TIMEOUT), __func__); \
- (W)->i915; \
- __i915_fini_wedge((W)))
+bool intel_has_gpu_reset(const struct intel_gt *gt);
+bool intel_has_reset_engine(const struct intel_gt *gt);
#endif /* I915_RESET_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_reset_types.h b/drivers/gpu/drm/i915/gt/intel_reset_types.h
new file mode 100644
index 000000000000..f43bc3a0fe4f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_reset_types.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __INTEL_RESET_TYPES_H_
+#define __INTEL_RESET_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/wait.h>
+#include <linux/srcu.h>
+
+struct intel_reset {
+ /**
+ * flags: Control various stages of the GPU reset
+ *
+ * #I915_RESET_BACKOFF - When we start a global reset, we need to
+ * serialise with any other users attempting to do the same, and
+ * any global resources that may be clobber by the reset (such as
+ * FENCE registers).
+ *
+ * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
+ * acquire the struct_mutex to reset an engine, we need an explicit
+ * flag to prevent two concurrent reset attempts in the same engine.
+ * As the number of engines continues to grow, allocate the flags from
+ * the most significant bits.
+ *
+ * #I915_WEDGED - If reset fails and we can no longer use the GPU,
+ * we set the #I915_WEDGED bit. Prior to command submission, e.g.
+ * i915_request_alloc(), this bit is checked and the sequence
+ * aborted (with -EIO reported to userspace) if set.
+ *
+ * #I915_WEDGED_ON_INIT - If we fail to initialize the GPU we can no
+ * longer use the GPU - similar to #I915_WEDGED bit. The difference in
+ * in the way we're handling "forced" unwedged (e.g. through debugfs),
+ * which is not allowed in case we failed to initialize.
+ */
+ unsigned long flags;
+#define I915_RESET_BACKOFF 0
+#define I915_RESET_MODESET 1
+#define I915_RESET_ENGINE 2
+#define I915_WEDGED_ON_INIT (BITS_PER_LONG - 2)
+#define I915_WEDGED (BITS_PER_LONG - 1)
+
+ struct mutex mutex; /* serialises wedging/unwedging */
+
+ /**
+ * Waitqueue to signal when the reset has completed. Used by clients
+ * that wait for dev_priv->mm.wedged to settle.
+ */
+ wait_queue_head_t queue;
+
+ struct srcu_struct backoff_srcu;
+};
+
+#endif /* _INTEL_RESET_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c
new file mode 100644
index 000000000000..374b28f13ca0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.c
@@ -0,0 +1,318 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_object.h"
+#include "i915_drv.h"
+#include "i915_vma.h"
+#include "intel_engine.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+unsigned int intel_ring_update_space(struct intel_ring *ring)
+{
+ unsigned int space;
+
+ space = __intel_ring_space(ring->head, ring->emit, ring->size);
+
+ ring->space = space;
+ return space;
+}
+
+int intel_ring_pin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+ unsigned int flags;
+ void *addr;
+ int ret;
+
+ if (atomic_fetch_inc(&ring->pin_count))
+ return 0;
+
+ flags = PIN_GLOBAL;
+
+ /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
+ flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+
+ if (vma->obj->stolen)
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
+
+ ret = i915_vma_pin(vma, 0, 0, flags);
+ if (unlikely(ret))
+ goto err_unpin;
+
+ if (i915_vma_is_map_and_fenceable(vma))
+ addr = (void __force *)i915_vma_pin_iomap(vma);
+ else
+ addr = i915_gem_object_pin_map(vma->obj,
+ i915_coherent_map_type(vma->vm->i915));
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto err_ring;
+ }
+
+ i915_vma_make_unshrinkable(vma);
+
+ /* Discard any unused bytes beyond that submitted to hw. */
+ intel_ring_reset(ring, ring->emit);
+
+ ring->vaddr = addr;
+ return 0;
+
+err_ring:
+ i915_vma_unpin(vma);
+err_unpin:
+ atomic_dec(&ring->pin_count);
+ return ret;
+}
+
+void intel_ring_reset(struct intel_ring *ring, u32 tail)
+{
+ tail = intel_ring_wrap(ring, tail);
+ ring->tail = tail;
+ ring->head = tail;
+ ring->emit = tail;
+ intel_ring_update_space(ring);
+}
+
+void intel_ring_unpin(struct intel_ring *ring)
+{
+ struct i915_vma *vma = ring->vma;
+
+ if (!atomic_dec_and_test(&ring->pin_count))
+ return;
+
+ i915_vma_unset_ggtt_write(vma);
+ if (i915_vma_is_map_and_fenceable(vma))
+ i915_vma_unpin_iomap(vma);
+ else
+ i915_gem_object_unpin_map(vma->obj);
+
+ i915_vma_make_purgeable(vma);
+ i915_vma_unpin(vma);
+}
+
+static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
+{
+ struct i915_address_space *vm = &ggtt->vm;
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = ERR_PTR(-ENODEV);
+ if (i915_ggtt_has_aperture(ggtt))
+ obj = i915_gem_object_create_stolen(i915, size);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ /*
+ * Mark ring buffers as read-only from GPU side (so no stray overwrites)
+ * if supported by the platform's GGTT.
+ */
+ if (vm->has_read_only)
+ i915_gem_object_set_readonly(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size)
+{
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_ring *ring;
+ struct i915_vma *vma;
+
+ GEM_BUG_ON(!is_power_of_2(size));
+ GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
+
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&ring->ref);
+ ring->size = size;
+
+ /*
+ * Workaround an erratum on the i830 which causes a hang if
+ * the TAIL pointer points to within the last 2 cachelines
+ * of the buffer.
+ */
+ ring->effective_size = size;
+ if (IS_I830(i915) || IS_I845G(i915))
+ ring->effective_size -= 2 * CACHELINE_BYTES;
+
+ intel_ring_update_space(ring);
+
+ vma = create_ring_vma(engine->gt->ggtt, size);
+ if (IS_ERR(vma)) {
+ kfree(ring);
+ return ERR_CAST(vma);
+ }
+ ring->vma = vma;
+
+ return ring;
+}
+
+void intel_ring_free(struct kref *ref)
+{
+ struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
+
+ i915_vma_put(ring->vma);
+ kfree(ring);
+}
+
+static noinline int
+wait_for_space(struct intel_ring *ring,
+ struct intel_timeline *tl,
+ unsigned int bytes)
+{
+ struct i915_request *target;
+ long timeout;
+
+ if (intel_ring_update_space(ring) >= bytes)
+ return 0;
+
+ GEM_BUG_ON(list_empty(&tl->requests));
+ list_for_each_entry(target, &tl->requests, link) {
+ if (target->ring != ring)
+ continue;
+
+ /* Would completion of this request free enough space? */
+ if (bytes <= __intel_ring_space(target->postfix,
+ ring->emit, ring->size))
+ break;
+ }
+
+ if (GEM_WARN_ON(&target->link == &tl->requests))
+ return -ENOSPC;
+
+ timeout = i915_request_wait(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ return timeout;
+
+ i915_request_retire_upto(target);
+
+ intel_ring_update_space(ring);
+ GEM_BUG_ON(ring->space < bytes);
+ return 0;
+}
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
+{
+ struct intel_ring *ring = rq->ring;
+ const unsigned int remain_usable = ring->effective_size - ring->emit;
+ const unsigned int bytes = num_dwords * sizeof(u32);
+ unsigned int need_wrap = 0;
+ unsigned int total_bytes;
+ u32 *cs;
+
+ /* Packets must be qword aligned. */
+ GEM_BUG_ON(num_dwords & 1);
+
+ total_bytes = bytes + rq->reserved_space;
+ GEM_BUG_ON(total_bytes > ring->effective_size);
+
+ if (unlikely(total_bytes > remain_usable)) {
+ const int remain_actual = ring->size - ring->emit;
+
+ if (bytes > remain_usable) {
+ /*
+ * Not enough space for the basic request. So need to
+ * flush out the remainder and then wait for
+ * base + reserved.
+ */
+ total_bytes += remain_actual;
+ need_wrap = remain_actual | 1;
+ } else {
+ /*
+ * The base request will fit but the reserved space
+ * falls off the end. So we don't need an immediate
+ * wrap and only need to effectively wait for the
+ * reserved size from the start of ringbuffer.
+ */
+ total_bytes = rq->reserved_space + remain_actual;
+ }
+ }
+
+ if (unlikely(total_bytes > ring->space)) {
+ int ret;
+
+ /*
+ * Space is reserved in the ringbuffer for finalising the
+ * request, as that cannot be allowed to fail. During request
+ * finalisation, reserved_space is set to 0 to stop the
+ * overallocation and the assumption is that then we never need
+ * to wait (which has the risk of failing with EINTR).
+ *
+ * See also i915_request_alloc() and i915_request_add().
+ */
+ GEM_BUG_ON(!rq->reserved_space);
+
+ ret = wait_for_space(ring,
+ i915_request_timeline(rq),
+ total_bytes);
+ if (unlikely(ret))
+ return ERR_PTR(ret);
+ }
+
+ if (unlikely(need_wrap)) {
+ need_wrap &= ~1;
+ GEM_BUG_ON(need_wrap > ring->space);
+ GEM_BUG_ON(ring->emit + need_wrap > ring->size);
+ GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
+
+ /* Fill the tail with MI_NOOP */
+ memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
+ ring->space -= need_wrap;
+ ring->emit = 0;
+ }
+
+ GEM_BUG_ON(ring->emit > ring->size - bytes);
+ GEM_BUG_ON(ring->space < bytes);
+ cs = ring->vaddr + ring->emit;
+ GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
+ ring->emit += bytes;
+ ring->space -= bytes;
+
+ return cs;
+}
+
+/* Align the ring tail to a cacheline boundary */
+int intel_ring_cacheline_align(struct i915_request *rq)
+{
+ int num_dwords;
+ void *cs;
+
+ num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
+ if (num_dwords == 0)
+ return 0;
+
+ num_dwords = CACHELINE_DWORDS - num_dwords;
+ GEM_BUG_ON(num_dwords & 1);
+
+ cs = intel_ring_begin(rq, num_dwords);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
+ intel_ring_advance(rq, cs + num_dwords);
+
+ GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h
new file mode 100644
index 000000000000..ea2839d9e044
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring.h
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_H
+#define INTEL_RING_H
+
+#include "i915_gem.h" /* GEM_BUG_ON */
+#include "i915_request.h"
+#include "intel_ring_types.h"
+
+struct intel_engine_cs;
+
+struct intel_ring *
+intel_engine_create_ring(struct intel_engine_cs *engine, int size);
+
+u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords);
+int intel_ring_cacheline_align(struct i915_request *rq);
+
+unsigned int intel_ring_update_space(struct intel_ring *ring);
+
+int intel_ring_pin(struct intel_ring *ring);
+void intel_ring_unpin(struct intel_ring *ring);
+void intel_ring_reset(struct intel_ring *ring, u32 tail);
+
+void intel_ring_free(struct kref *ref);
+
+static inline struct intel_ring *intel_ring_get(struct intel_ring *ring)
+{
+ kref_get(&ring->ref);
+ return ring;
+}
+
+static inline void intel_ring_put(struct intel_ring *ring)
+{
+ kref_put(&ring->ref, intel_ring_free);
+}
+
+static inline void intel_ring_advance(struct i915_request *rq, u32 *cs)
+{
+ /* Dummy function.
+ *
+ * This serves as a placeholder in the code so that the reader
+ * can compare against the preceding intel_ring_begin() and
+ * check that the number of dwords emitted matches the space
+ * reserved for the command packet (i.e. the value passed to
+ * intel_ring_begin()).
+ */
+ GEM_BUG_ON((rq->ring->vaddr + rq->ring->emit) != cs);
+}
+
+static inline u32 intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+ return pos & (ring->size - 1);
+}
+
+static inline bool
+intel_ring_offset_valid(const struct intel_ring *ring,
+ unsigned int pos)
+{
+ if (pos & -ring->size) /* must be strictly within the ring */
+ return false;
+
+ if (!IS_ALIGNED(pos, 8)) /* must be qword aligned */
+ return false;
+
+ return true;
+}
+
+static inline u32 intel_ring_offset(const struct i915_request *rq, void *addr)
+{
+ /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
+ u32 offset = addr - rq->ring->vaddr;
+ GEM_BUG_ON(offset > rq->ring->size);
+ return intel_ring_wrap(rq->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+ GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
+
+ /*
+ * "Ring Buffer Use"
+ * Gen2 BSpec "1. Programming Environment" / 1.4.4.6
+ * Gen3 BSpec "1c Memory Interface Functions" / 2.3.4.5
+ * Gen4+ BSpec "1c Memory Interface and Command Stream" / 5.3.4.5
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ *
+ * We use ring->head as the last known location of the actual RING_HEAD,
+ * it may have advanced but in the worst case it is equally the same
+ * as ring->head and so we should never program RING_TAIL to advance
+ * into the same cacheline as ring->head.
+ */
+#define cacheline(a) round_down(a, CACHELINE_BYTES)
+ GEM_BUG_ON(cacheline(tail) == cacheline(ring->head) &&
+ tail < ring->head);
+#undef cacheline
+}
+
+static inline unsigned int
+intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
+{
+ /* Whilst writes to the tail are strictly order, there is no
+ * serialisation between readers and the writers. The tail may be
+ * read by i915_request_retire() just as it is being updated
+ * by execlists, as although the breadcrumb is complete, the context
+ * switch hasn't been seen.
+ */
+ assert_ring_tail_valid(ring, tail);
+ ring->tail = tail;
+ return tail;
+}
+
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+ /*
+ * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+ * same cacheline, the Head Pointer must not be greater than the Tail
+ * Pointer."
+ */
+ GEM_BUG_ON(!is_power_of_2(size));
+ return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
+#endif /* INTEL_RING_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 12010e798868..bc44fe8e5ffa 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -33,11 +33,15 @@
#include "gem/i915_gem_context.h"
+#include "gen6_ppgtt.h"
#include "i915_drv.h"
-#include "i915_gem_render_state.h"
#include "i915_trace.h"
#include "intel_context.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
#include "intel_reset.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
/* Rough estimate of the typical request size, performing a flush,
@@ -45,16 +49,6 @@
*/
#define LEGACY_REQUEST_SIZE 200
-unsigned int intel_ring_update_space(struct intel_ring *ring)
-{
- unsigned int space;
-
- space = __intel_ring_space(ring->head, ring->emit, ring->size);
-
- ring->space = space;
- return space;
-}
-
static int
gen2_render_ring_flush(struct i915_request *rq, u32 mode)
{
@@ -75,7 +69,8 @@ gen2_render_ring_flush(struct i915_request *rq, u32 mode)
*cs++ = cmd;
while (num_store_dw--) {
*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
- *cs++ = i915_scratch_offset(rq->i915);
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT);
*cs++ = 0;
}
*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
@@ -148,7 +143,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
*/
if (mode & EMIT_INVALIDATE) {
*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+ PIPE_CONTROL_GLOBAL_GTT;
*cs++ = 0;
*cs++ = 0;
@@ -156,7 +153,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
*cs++ = MI_FLUSH;
*cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+ PIPE_CONTROL_GLOBAL_GTT;
*cs++ = 0;
*cs++ = 0;
}
@@ -208,7 +207,9 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
static int
gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
{
- u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
+ u32 scratch_addr =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
u32 *cs;
cs = intel_ring_begin(rq, 6);
@@ -241,7 +242,9 @@ gen6_emit_post_sync_nonzero_flush(struct i915_request *rq)
static int
gen6_render_ring_flush(struct i915_request *rq, u32 mode)
{
- u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
+ u32 scratch_addr =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
u32 *cs, flags = 0;
int ret;
@@ -299,7 +302,9 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = PIPE_CONTROL_QW_WRITE;
- *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT) |
+ PIPE_CONTROL_GLOBAL_GTT;
*cs++ = 0;
/* Finally we can flush and with it emit the breadcrumb */
@@ -309,7 +314,8 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset |
+ PIPE_CONTROL_GLOBAL_GTT;
*cs++ = rq->fence.seqno;
*cs++ = MI_USER_INTERRUPT;
@@ -342,7 +348,9 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
static int
gen7_render_ring_flush(struct i915_request *rq, u32 mode)
{
- u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
+ u32 scratch_addr =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH);
u32 *cs, flags = 0;
/*
@@ -355,6 +363,12 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
*/
flags |= PIPE_CONTROL_CS_STALL;
+ /*
+ * CS_STALL suggests at least a post-sync write.
+ */
+ flags |= PIPE_CONTROL_QW_WRITE;
+ flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
@@ -373,13 +387,6 @@ gen7_render_ring_flush(struct i915_request *rq, u32 mode)
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
- /*
- * TLB invalidate requires a post-sync write.
- */
- flags |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
-
- flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
/* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache
@@ -410,7 +417,7 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_CS_STALL);
- *cs++ = rq->timeline->hwsp_offset;
+ *cs++ = i915_request_active_timeline(rq)->hwsp_offset;
*cs++ = rq->fence.seqno;
*cs++ = MI_USER_INTERRUPT;
@@ -424,8 +431,8 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -444,10 +451,11 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
- *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+ *cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
+ MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
*cs++ = rq->fence.seqno;
@@ -489,14 +497,13 @@ static void set_hwstam(struct intel_engine_cs *engine, u32 mask)
static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
{
- struct drm_i915_private *dev_priv = engine->i915;
u32 addr;
addr = lower_32_bits(phys);
- if (INTEL_GEN(dev_priv) >= 4)
+ if (INTEL_GEN(engine->i915) >= 4)
addr |= (phys >> 28) & 0xf0;
- I915_WRITE(HWS_PGA, addr);
+ intel_uncore_write(engine->uncore, HWS_PGA, addr);
}
static struct page *status_page(struct intel_engine_cs *engine)
@@ -515,14 +522,13 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
{
- struct drm_i915_private *dev_priv = engine->i915;
i915_reg_t hwsp;
/*
* The ring status page addresses are no longer next to the rest of
* the ring registers as of gen7.
*/
- if (IS_GEN(dev_priv, 7)) {
+ if (IS_GEN(engine->i915, 7)) {
switch (engine->id) {
/*
* No more rings exist on Gen7. Default case is only to shut up
@@ -544,14 +550,14 @@ static void set_hwsp(struct intel_engine_cs *engine, u32 offset)
hwsp = VEBOX_HWS_PGA_GEN7;
break;
}
- } else if (IS_GEN(dev_priv, 6)) {
+ } else if (IS_GEN(engine->i915, 6)) {
hwsp = RING_HWS_PGA_GEN6(engine->mmio_base);
} else {
hwsp = RING_HWS_PGA(engine->mmio_base);
}
- I915_WRITE(hwsp, offset);
- POSTING_READ(hwsp);
+ intel_uncore_write(engine->uncore, hwsp, offset);
+ intel_uncore_posting_read(engine->uncore, hwsp);
}
static void flush_cs_tlb(struct intel_engine_cs *engine)
@@ -623,14 +629,15 @@ static bool stop_ring(struct intel_engine_cs *engine)
static int xcs_resume(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct intel_ring *ring = engine->buffer;
+ struct intel_ring *ring = engine->legacy.ring;
int ret = 0;
- GEM_TRACE("%s: ring:{HEAD:%04x, TAIL:%04x}\n",
- engine->name, ring->head, ring->tail);
+ ENGINE_TRACE(engine, "ring:{HEAD:%04x, TAIL:%04x}\n",
+ ring->head, ring->tail);
intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
+ /* WaClearRingBufHeadRegAtInit:ctg,elk */
if (!stop_ring(engine)) {
/* G45 ring initialization often fails to reset head to zero */
DRM_DEBUG_DRIVER("%s head not reset to zero "
@@ -662,19 +669,16 @@ static int xcs_resume(struct intel_engine_cs *engine)
intel_engine_reset_breadcrumbs(engine);
/* Enforce ordering by reading HEAD register back */
- ENGINE_READ(engine, RING_HEAD);
+ ENGINE_POSTING_READ(engine, RING_HEAD);
- /* Initialize the ring. This must happen _after_ we've cleared the ring
+ /*
+ * Initialize the ring. This must happen _after_ we've cleared the ring
* registers with the above sequence (the readback of the HEAD registers
* also enforces ordering), otherwise the hw might lose the new ring
- * register values. */
+ * register values.
+ */
ENGINE_WRITE(engine, RING_START, i915_ggtt_offset(ring->vma));
- /* WaClearRingBufHeadRegAtInit:ctg,elk */
- if (ENGINE_READ(engine, RING_HEAD))
- DRM_DEBUG_DRIVER("%s initialization failed [head=%08x], fudging\n",
- engine->name, ENGINE_READ(engine, RING_HEAD));
-
/* Check that the ring offsets point within the ring! */
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
@@ -716,7 +720,7 @@ static int xcs_resume(struct intel_engine_cs *engine)
}
/* Papering over lost _interrupts_ immediately following the restart */
- intel_engine_queue_breadcrumbs(engine);
+ intel_engine_signal_breadcrumbs(engine);
out:
intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
@@ -725,10 +729,47 @@ out:
static void reset_prepare(struct intel_engine_cs *engine)
{
- intel_engine_stop_cs(engine);
+ struct intel_uncore *uncore = engine->uncore;
+ const u32 base = engine->mmio_base;
+
+ /*
+ * We stop engines, otherwise we might get failed reset and a
+ * dead gpu (on elk). Also as modern gpu as kbl can suffer
+ * from system hang if batchbuffer is progressing when
+ * the reset is issued, regardless of READY_TO_RESET ack.
+ * Thus assume it is best to stop engines on all gens
+ * where we have a gpu reset.
+ *
+ * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES)
+ *
+ * WaMediaResetMainRingCleanup:ctg,elk (presumably)
+ *
+ * FIXME: Wa for more modern gens needs to be validated
+ */
+ ENGINE_TRACE(engine, "\n");
+
+ if (intel_engine_stop_cs(engine))
+ ENGINE_TRACE(engine, "timed out on STOP_RING\n");
+
+ intel_uncore_write_fw(uncore,
+ RING_HEAD(base),
+ intel_uncore_read_fw(uncore, RING_TAIL(base)));
+ intel_uncore_posting_read_fw(uncore, RING_HEAD(base)); /* paranoia */
+
+ intel_uncore_write_fw(uncore, RING_HEAD(base), 0);
+ intel_uncore_write_fw(uncore, RING_TAIL(base), 0);
+ intel_uncore_posting_read_fw(uncore, RING_TAIL(base));
+
+ /* The ring must be empty before it is disabled */
+ intel_uncore_write_fw(uncore, RING_CTL(base), 0);
+
+ /* Check acts as a post */
+ if (intel_uncore_read_fw(uncore, RING_HEAD(base)))
+ ENGINE_TRACE(engine, "ring head [%x] not parked\n",
+ intel_uncore_read_fw(uncore, RING_HEAD(base)));
}
-static void reset_ring(struct intel_engine_cs *engine, bool stalled)
+static void reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
struct i915_request *pos, *rq;
unsigned long flags;
@@ -781,14 +822,14 @@ static void reset_ring(struct intel_engine_cs *engine, bool stalled)
* If the request was innocent, we try to replay the request
* with the restored context.
*/
- i915_reset_request(rq, stalled);
+ __i915_request_reset(rq, stalled);
- GEM_BUG_ON(rq->ring != engine->buffer);
+ GEM_BUG_ON(rq->ring != engine->legacy.ring);
head = rq->head;
} else {
- head = engine->buffer->tail;
+ head = engine->legacy.ring->tail;
}
- engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+ engine->legacy.ring->head = intel_ring_wrap(engine->legacy.ring, head);
spin_unlock_irqrestore(&engine->active.lock, flags);
}
@@ -797,24 +838,10 @@ static void reset_finish(struct intel_engine_cs *engine)
{
}
-static int intel_rcs_ctx_init(struct i915_request *rq)
-{
- int ret;
-
- ret = intel_engine_emit_ctx_wa(rq);
- if (ret != 0)
- return ret;
-
- ret = i915_gem_render_state_emit(rq);
- if (ret)
- return ret;
-
- return 0;
-}
-
static int rcs_resume(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
+ struct drm_i915_private *i915 = engine->i915;
+ struct intel_uncore *uncore = engine->uncore;
/*
* Disable CONSTANT_BUFFER before it is loaded from the context
@@ -826,13 +853,14 @@ static int rcs_resume(struct intel_engine_cs *engine)
* they are already accustomed to from before contexts were
* enabled.
*/
- if (IS_GEN(dev_priv, 4))
- I915_WRITE(ECOSKPD,
+ if (IS_GEN(i915, 4))
+ intel_uncore_write(uncore, ECOSKPD,
_MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE));
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
- if (IS_GEN_RANGE(dev_priv, 4, 6))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
+ if (IS_GEN_RANGE(i915, 4, 6))
+ intel_uncore_write(uncore, MI_MODE,
+ _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
/* We need to disable the AsyncFlip performance optimisations in order
* to use MI_WAIT_FOR_EVENT within the CS. It should already be
@@ -840,38 +868,40 @@ static int rcs_resume(struct intel_engine_cs *engine)
*
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
*/
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+ if (IS_GEN_RANGE(i915, 6, 7))
+ intel_uncore_write(uncore, MI_MODE,
+ _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
/* Required for the hardware to program scanline values for waiting */
/* WaEnableFlushTlbInvalidationMode:snb */
- if (IS_GEN(dev_priv, 6))
- I915_WRITE(GFX_MODE,
+ if (IS_GEN(i915, 6))
+ intel_uncore_write(uncore, GFX_MODE,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
- if (IS_GEN(dev_priv, 7))
- I915_WRITE(GFX_MODE_GEN7,
+ if (IS_GEN(i915, 7))
+ intel_uncore_write(uncore, GFX_MODE_GEN7,
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
_MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
- if (IS_GEN(dev_priv, 6)) {
+ if (IS_GEN(i915, 6)) {
/* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement
* policy. [...] This bit must be reset. LRA replacement
* policy is not supported."
*/
- I915_WRITE(CACHE_MODE_0,
+ intel_uncore_write(uncore, CACHE_MODE_0,
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
}
- if (IS_GEN_RANGE(dev_priv, 6, 7))
- I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+ if (IS_GEN_RANGE(i915, 6, 7))
+ intel_uncore_write(uncore, INSTPM,
+ _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
return xcs_resume(engine);
}
-static void cancel_requests(struct intel_engine_cs *engine)
+static void reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
unsigned long flags;
@@ -894,6 +924,7 @@ static void cancel_requests(struct intel_engine_cs *engine)
static void i9xx_submit_request(struct i915_request *request)
{
i915_request_submit(request);
+ wmb(); /* paranoid flush writes out of the WCB before mmio */
ENGINE_WRITE(request->engine, RING_TAIL,
intel_ring_set_tail(request->ring, request->tail));
@@ -901,8 +932,8 @@ static void i9xx_submit_request(struct i915_request *request)
static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
@@ -924,8 +955,8 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
{
int i;
- GEM_BUG_ON(rq->timeline->hwsp_ggtt != rq->engine->status_page.vma);
- GEM_BUG_ON(offset_in_page(rq->timeline->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
+ GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
+ GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
*cs++ = MI_FLUSH;
@@ -948,13 +979,13 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
static void
gen5_irq_enable(struct intel_engine_cs *engine)
{
- gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
+ gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
}
static void
gen5_irq_disable(struct intel_engine_cs *engine)
{
- gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
+ gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
}
static void
@@ -1015,14 +1046,14 @@ gen6_irq_enable(struct intel_engine_cs *engine)
/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
ENGINE_POSTING_READ(engine, RING_IMR);
- gen5_enable_gt_irq(engine->i915, engine->irq_enable_mask);
+ gen5_gt_enable_irq(engine->gt, engine->irq_enable_mask);
}
static void
gen6_irq_disable(struct intel_engine_cs *engine)
{
ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask);
- gen5_disable_gt_irq(engine->i915, engine->irq_enable_mask);
+ gen5_gt_disable_irq(engine->gt, engine->irq_enable_mask);
}
static void
@@ -1033,14 +1064,14 @@ hsw_vebox_irq_enable(struct intel_engine_cs *engine)
/* Flush/delay to ensure the RING_IMR is active before the GT IMR */
ENGINE_POSTING_READ(engine, RING_IMR);
- gen6_unmask_pm_irq(engine->i915, engine->irq_enable_mask);
+ gen6_gt_pm_unmask_irq(engine->gt, engine->irq_enable_mask);
}
static void
hsw_vebox_irq_disable(struct intel_engine_cs *engine)
{
ENGINE_WRITE(engine, RING_IMR, ~0);
- gen6_mask_pm_irq(engine->i915, engine->irq_enable_mask);
+ gen6_gt_pm_mask_irq(engine->gt, engine->irq_enable_mask);
}
static int
@@ -1071,9 +1102,11 @@ i830_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
unsigned int dispatch_flags)
{
- u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
+ u32 *cs, cs_offset =
+ intel_gt_scratch_offset(rq->engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT);
- GEM_BUG_ON(rq->i915->gt.scratch->size < I830_WA_SIZE);
+ GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
@@ -1100,7 +1133,7 @@ i830_emit_bb_start(struct i915_request *rq,
* stable batch scratch bo area (so that the CS never
* stumbles over its tlb invalidation bug) ...
*/
- *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA;
+ *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096;
*cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096;
*cs++ = cs_offset;
@@ -1146,179 +1179,9 @@ i915_emit_bb_start(struct i915_request *rq,
return 0;
}
-int intel_ring_pin(struct intel_ring *ring)
-{
- struct i915_vma *vma = ring->vma;
- unsigned int flags;
- void *addr;
- int ret;
-
- if (atomic_fetch_inc(&ring->pin_count))
- return 0;
-
- ret = i915_timeline_pin(ring->timeline);
- if (ret)
- goto err_unpin;
-
- flags = PIN_GLOBAL;
-
- /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-
- if (vma->obj->stolen)
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
-
- ret = i915_vma_pin(vma, 0, 0, flags);
- if (unlikely(ret))
- goto err_timeline;
-
- if (i915_vma_is_map_and_fenceable(vma))
- addr = (void __force *)i915_vma_pin_iomap(vma);
- else
- addr = i915_gem_object_pin_map(vma->obj,
- i915_coherent_map_type(vma->vm->i915));
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_ring;
- }
-
- vma->obj->pin_global++;
-
- GEM_BUG_ON(ring->vaddr);
- ring->vaddr = addr;
-
- return 0;
-
-err_ring:
- i915_vma_unpin(vma);
-err_timeline:
- i915_timeline_unpin(ring->timeline);
-err_unpin:
- atomic_dec(&ring->pin_count);
- return ret;
-}
-
-void intel_ring_reset(struct intel_ring *ring, u32 tail)
-{
- GEM_BUG_ON(!intel_ring_offset_valid(ring, tail));
-
- ring->tail = tail;
- ring->head = tail;
- ring->emit = tail;
- intel_ring_update_space(ring);
-}
-
-void intel_ring_unpin(struct intel_ring *ring)
-{
- if (!atomic_dec_and_test(&ring->pin_count))
- return;
-
- /* Discard any unused bytes beyond that submitted to hw. */
- intel_ring_reset(ring, ring->tail);
-
- GEM_BUG_ON(!ring->vma);
- if (i915_vma_is_map_and_fenceable(ring->vma))
- i915_vma_unpin_iomap(ring->vma);
- else
- i915_gem_object_unpin_map(ring->vma->obj);
-
- GEM_BUG_ON(!ring->vaddr);
- ring->vaddr = NULL;
-
- ring->vma->obj->pin_global--;
- i915_vma_unpin(ring->vma);
-
- i915_timeline_unpin(ring->timeline);
-}
-
-static struct i915_vma *
-intel_ring_create_vma(struct drm_i915_private *dev_priv, int size)
-{
- struct i915_address_space *vm = &dev_priv->ggtt.vm;
- struct drm_i915_gem_object *obj;
- struct i915_vma *vma;
-
- obj = i915_gem_object_create_stolen(dev_priv, size);
- if (!obj)
- obj = i915_gem_object_create_internal(dev_priv, size);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
-
- /*
- * Mark ring buffers as read-only from GPU side (so no stray overwrites)
- * if supported by the platform's GGTT.
- */
- if (vm->has_read_only)
- i915_gem_object_set_readonly(obj);
-
- vma = i915_vma_instance(obj, vm, NULL);
- if (IS_ERR(vma))
- goto err;
-
- return vma;
-
-err:
- i915_gem_object_put(obj);
- return vma;
-}
-
-struct intel_ring *
-intel_engine_create_ring(struct intel_engine_cs *engine,
- struct i915_timeline *timeline,
- int size)
-{
- struct intel_ring *ring;
- struct i915_vma *vma;
-
- GEM_BUG_ON(!is_power_of_2(size));
- GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
-
- ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (!ring)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&ring->ref);
- INIT_LIST_HEAD(&ring->request_list);
- ring->timeline = i915_timeline_get(timeline);
-
- ring->size = size;
- /* Workaround an erratum on the i830 which causes a hang if
- * the TAIL pointer points to within the last 2 cachelines
- * of the buffer.
- */
- ring->effective_size = size;
- if (IS_I830(engine->i915) || IS_I845G(engine->i915))
- ring->effective_size -= 2 * CACHELINE_BYTES;
-
- intel_ring_update_space(ring);
-
- vma = intel_ring_create_vma(engine->i915, size);
- if (IS_ERR(vma)) {
- kfree(ring);
- return ERR_CAST(vma);
- }
- ring->vma = vma;
-
- return ring;
-}
-
-void intel_ring_free(struct kref *ref)
-{
- struct intel_ring *ring = container_of(ref, typeof(*ring), ref);
-
- i915_vma_close(ring->vma);
- i915_vma_put(ring->vma);
-
- i915_timeline_put(ring->timeline);
- kfree(ring);
-}
-
static void __ring_context_fini(struct intel_context *ce)
{
- GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
- i915_gem_object_put(ce->state->obj);
+ i915_vma_put(ce->state);
}
static void ring_context_destroy(struct kref *ref)
@@ -1330,33 +1193,45 @@ static void ring_context_destroy(struct kref *ref)
if (ce->state)
__ring_context_fini(ce);
+ intel_context_fini(ce);
intel_context_free(ce);
}
-static int __context_pin_ppgtt(struct i915_gem_context *ctx)
+static struct i915_address_space *vm_alias(struct intel_context *ce)
+{
+ struct i915_address_space *vm;
+
+ vm = ce->vm;
+ if (i915_is_ggtt(vm))
+ vm = &i915_vm_to_ggtt(vm)->alias->vm;
+
+ return vm;
+}
+
+static int __context_pin_ppgtt(struct intel_context *ce)
{
struct i915_address_space *vm;
int err = 0;
- vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
+ vm = vm_alias(ce);
if (vm)
err = gen6_ppgtt_pin(i915_vm_to_ppgtt((vm)));
return err;
}
-static void __context_unpin_ppgtt(struct i915_gem_context *ctx)
+static void __context_unpin_ppgtt(struct intel_context *ce)
{
struct i915_address_space *vm;
- vm = ctx->vm ?: &ctx->i915->mm.aliasing_ppgtt->vm;
+ vm = vm_alias(ce);
if (vm)
gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm));
}
static void ring_context_unpin(struct intel_context *ce)
{
- __context_unpin_ppgtt(ce->gem_context);
+ __context_unpin_ppgtt(ce);
}
static struct i915_vma *
@@ -1412,7 +1287,7 @@ alloc_context_vma(struct intel_engine_cs *engine)
i915_gem_object_unpin_map(obj);
}
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -1427,16 +1302,17 @@ err_obj:
return ERR_PTR(err);
}
-static int ring_context_pin(struct intel_context *ce)
+static int ring_context_alloc(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
- int err;
/* One ringbuffer to rule them all */
- GEM_BUG_ON(!engine->buffer);
- ce->ring = engine->buffer;
+ GEM_BUG_ON(!engine->legacy.ring);
+ ce->ring = engine->legacy.ring;
+ ce->timeline = intel_timeline_get(engine->legacy.timeline);
- if (!ce->state && engine->context_size) {
+ GEM_BUG_ON(ce->state);
+ if (engine->context_size) {
struct i915_vma *vma;
vma = alloc_context_vma(engine);
@@ -1444,29 +1320,26 @@ static int ring_context_pin(struct intel_context *ce)
return PTR_ERR(vma);
ce->state = vma;
+ if (engine->default_state)
+ __set_bit(CONTEXT_VALID_BIT, &ce->flags);
}
- err = intel_context_active_acquire(ce, PIN_HIGH);
- if (err)
- return err;
-
- err = __context_pin_ppgtt(ce->gem_context);
- if (err)
- goto err_active;
-
return 0;
+}
-err_active:
- intel_context_active_release(ce);
- return err;
+static int ring_context_pin(struct intel_context *ce)
+{
+ return __context_pin_ppgtt(ce);
}
static void ring_context_reset(struct intel_context *ce)
{
- intel_ring_reset(ce->ring, 0);
+ intel_ring_reset(ce->ring, ce->ring->emit);
}
static const struct intel_context_ops ring_context_ops = {
+ .alloc = ring_context_alloc,
+
.pin = ring_context_pin,
.unpin = ring_context_unpin,
@@ -1477,45 +1350,38 @@ static const struct intel_context_ops ring_context_ops = {
.destroy = ring_context_destroy,
};
-static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt)
+static int load_pd_dir(struct i915_request *rq,
+ const struct i915_ppgtt *ppgtt,
+ u32 valid)
{
const struct intel_engine_cs * const engine = rq->engine;
u32 *cs;
- cs = intel_ring_begin(rq, 6);
+ cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine->mmio_base));
- *cs++ = PP_DIR_DCLV_2G;
+ *cs++ = valid;
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = ppgtt->pd->base.ggtt_offset << 10;
-
- intel_ring_advance(rq, cs);
-
- return 0;
-}
-
-static int flush_pd_dir(struct i915_request *rq)
-{
- const struct intel_engine_cs * const engine = rq->engine;
- u32 *cs;
+ *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10;
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- /* Stall until the page table load is complete */
+ /* Stall until the page table load is complete? */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base));
- *cs++ = i915_scratch_offset(rq->i915);
- *cs++ = MI_NOOP;
+ *cs++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT);
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = i915_mmio_reg_offset(RING_INSTPM(engine->mmio_base));
+ *cs++ = _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE);
intel_ring_advance(rq, cs);
- return 0;
+
+ return rq->engine->emit_flush(rq, EMIT_FLUSH);
}
static inline int mi_set_context(struct i915_request *rq, u32 flags)
@@ -1524,19 +1390,11 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_engines =
- IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
- flags |= MI_MM_SPACE_GTT;
- if (IS_HASWELL(i915))
- /* These flags are for resource streamer on HSW+ */
- flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
- else
- /* We need to save the extended state for powersaving modes */
- flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
-
len = 4;
if (IS_GEN(i915, 7))
len += 2 + (num_engines ? 4 * num_engines + 6 : 0);
@@ -1560,7 +1418,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *signaller;
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
+ for_each_engine(signaller, engine->gt, id) {
if (signaller == engine)
continue;
@@ -1601,7 +1459,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
*cs++ = MI_NOOP;
*cs++ = MI_SET_CONTEXT;
- *cs++ = i915_ggtt_offset(rq->hw_context->state) | flags;
+ *cs++ = i915_ggtt_offset(rq->context->state) | flags;
/*
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
* WaMiSetContext_Hang:snb,ivb,vlv
@@ -1614,7 +1472,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
i915_reg_t last_reg = {}; /* keep gcc quiet */
*cs++ = MI_LOAD_REGISTER_IMM(num_engines);
- for_each_engine(signaller, i915, id) {
+ for_each_engine(signaller, engine->gt, id) {
if (signaller == engine)
continue;
@@ -1627,7 +1485,8 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
/* Insert a delay before the next switch! */
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
*cs++ = i915_mmio_reg_offset(last_reg);
- *cs++ = i915_scratch_offset(rq->i915);
+ *cs++ = intel_gt_scratch_offset(engine->gt,
+ INTEL_GT_SCRATCH_FIELD_DEFAULT);
*cs++ = MI_NOOP;
}
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
@@ -1640,7 +1499,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
return 0;
}
-static int remap_l3(struct i915_request *rq, int slice)
+static int remap_l3_slice(struct i915_request *rq, int slice)
{
u32 *cs, *remap_info = rq->i915->l3_parity.remap_info[slice];
int i;
@@ -1668,120 +1527,97 @@ static int remap_l3(struct i915_request *rq, int slice)
return 0;
}
-static int switch_context(struct i915_request *rq)
+static int remap_l3(struct i915_request *rq)
{
- struct intel_engine_cs *engine = rq->engine;
- struct i915_gem_context *ctx = rq->gem_context;
- struct i915_address_space *vm =
- ctx->vm ?: &rq->i915->mm.aliasing_ppgtt->vm;
- unsigned int unwind_mm = 0;
- u32 hw_flags = 0;
- int ret, i;
+ struct i915_gem_context *ctx = i915_request_gem_context(rq);
+ int i, err;
- GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
+ if (!ctx || !ctx->remap_slice)
+ return 0;
- if (vm) {
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- int loops;
+ for (i = 0; i < MAX_L3_SLICES; i++) {
+ if (!(ctx->remap_slice & BIT(i)))
+ continue;
- /*
- * Baytail takes a little more convincing that it really needs
- * to reload the PD between contexts. It is not just a little
- * longer, as adding more stalls after the load_pd_dir (i.e.
- * adding a long loop around flush_pd_dir) is not as effective
- * as reloading the PD umpteen times. 32 is derived from
- * experimentation (gem_exec_parallel/fds) and has no good
- * explanation.
- */
- loops = 1;
- if (engine->id == BCS0 && IS_VALLEYVIEW(engine->i915))
- loops = 32;
-
- do {
- ret = load_pd_dir(rq, ppgtt);
- if (ret)
- goto err;
- } while (--loops);
-
- if (ppgtt->pd_dirty_engines & engine->mask) {
- unwind_mm = engine->mask;
- ppgtt->pd_dirty_engines &= ~unwind_mm;
- hw_flags = MI_FORCE_RESTORE;
- }
+ err = remap_l3_slice(rq, i);
+ if (err)
+ return err;
}
- if (rq->hw_context->state) {
- GEM_BUG_ON(engine->id != RCS0);
+ ctx->remap_slice = 0;
+ return 0;
+}
- /*
- * The kernel context(s) is treated as pure scratch and is not
- * expected to retain any state (as we sacrifice it during
- * suspend and on resume it may be corrupted). This is ok,
- * as nothing actually executes using the kernel context; it
- * is purely used for flushing user contexts.
- */
- if (i915_gem_context_is_kernel(ctx))
- hw_flags = MI_RESTORE_INHIBIT;
+static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
+{
+ int ret;
- ret = mi_set_context(rq, hw_flags);
- if (ret)
- goto err_mm;
- }
+ if (!vm)
+ return 0;
- if (vm) {
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- goto err_mm;
+ ret = rq->engine->emit_flush(rq, EMIT_FLUSH);
+ if (ret)
+ return ret;
- ret = flush_pd_dir(rq);
- if (ret)
- goto err_mm;
+ /*
+ * Not only do we need a full barrier (post-sync write) after
+ * invalidating the TLBs, but we need to wait a little bit
+ * longer. Whether this is merely delaying us, or the
+ * subsequent flush is a key part of serialising with the
+ * post-sync op, this extra pass appears vital before a
+ * mm switch!
+ */
+ ret = load_pd_dir(rq, i915_vm_to_ppgtt(vm), PP_DIR_DCLV_2G);
+ if (ret)
+ return ret;
- /*
- * Not only do we need a full barrier (post-sync write) after
- * invalidating the TLBs, but we need to wait a little bit
- * longer. Whether this is merely delaying us, or the
- * subsequent flush is a key part of serialising with the
- * post-sync op, this extra pass appears vital before a
- * mm switch!
- */
- ret = engine->emit_flush(rq, EMIT_INVALIDATE);
- if (ret)
- goto err_mm;
+ return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+}
- ret = engine->emit_flush(rq, EMIT_FLUSH);
- if (ret)
- goto err_mm;
- }
+static int switch_context(struct i915_request *rq)
+{
+ struct intel_context *ce = rq->context;
+ int ret;
- if (ctx->remap_slice) {
- for (i = 0; i < MAX_L3_SLICES; i++) {
- if (!(ctx->remap_slice & BIT(i)))
- continue;
+ GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
- ret = remap_l3(rq, i);
- if (ret)
- goto err_mm;
- }
+ ret = switch_mm(rq, vm_alias(ce));
+ if (ret)
+ return ret;
+
+ if (ce->state) {
+ u32 flags;
+
+ GEM_BUG_ON(rq->engine->id != RCS0);
- ctx->remap_slice = 0;
+ /* For resource streamer on HSW+ and power context elsewhere */
+ BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
+ BUILD_BUG_ON(HSW_MI_RS_RESTORE_STATE_EN != MI_RESTORE_EXT_STATE_EN);
+
+ flags = MI_SAVE_EXT_STATE_EN | MI_MM_SPACE_GTT;
+ if (test_bit(CONTEXT_VALID_BIT, &ce->flags))
+ flags |= MI_RESTORE_EXT_STATE_EN;
+ else
+ flags |= MI_RESTORE_INHIBIT;
+
+ ret = mi_set_context(rq, flags);
+ if (ret)
+ return ret;
}
- return 0;
+ ret = remap_l3(rq);
+ if (ret)
+ return ret;
-err_mm:
- if (unwind_mm)
- i915_vm_to_ppgtt(vm)->pd_dirty_engines |= unwind_mm;
-err:
- return ret;
+ return 0;
}
static int ring_request_alloc(struct i915_request *request)
{
int ret;
- GEM_BUG_ON(!intel_context_is_pinned(request->hw_context));
- GEM_BUG_ON(request->timeline->has_initial_breadcrumb);
+ GEM_BUG_ON(!intel_context_is_pinned(request->context));
+ GEM_BUG_ON(i915_request_timeline(request)->has_initial_breadcrumb);
/*
* Flush enough space to reduce the likelihood of waiting after
@@ -1803,140 +1639,6 @@ static int ring_request_alloc(struct i915_request *request)
return 0;
}
-static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes)
-{
- struct i915_request *target;
- long timeout;
-
- if (intel_ring_update_space(ring) >= bytes)
- return 0;
-
- GEM_BUG_ON(list_empty(&ring->request_list));
- list_for_each_entry(target, &ring->request_list, ring_link) {
- /* Would completion of this request free enough space? */
- if (bytes <= __intel_ring_space(target->postfix,
- ring->emit, ring->size))
- break;
- }
-
- if (WARN_ON(&target->ring_link == &ring->request_list))
- return -ENOSPC;
-
- timeout = i915_request_wait(target,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT);
- if (timeout < 0)
- return timeout;
-
- i915_request_retire_upto(target);
-
- intel_ring_update_space(ring);
- GEM_BUG_ON(ring->space < bytes);
- return 0;
-}
-
-u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords)
-{
- struct intel_ring *ring = rq->ring;
- const unsigned int remain_usable = ring->effective_size - ring->emit;
- const unsigned int bytes = num_dwords * sizeof(u32);
- unsigned int need_wrap = 0;
- unsigned int total_bytes;
- u32 *cs;
-
- /* Packets must be qword aligned. */
- GEM_BUG_ON(num_dwords & 1);
-
- total_bytes = bytes + rq->reserved_space;
- GEM_BUG_ON(total_bytes > ring->effective_size);
-
- if (unlikely(total_bytes > remain_usable)) {
- const int remain_actual = ring->size - ring->emit;
-
- if (bytes > remain_usable) {
- /*
- * Not enough space for the basic request. So need to
- * flush out the remainder and then wait for
- * base + reserved.
- */
- total_bytes += remain_actual;
- need_wrap = remain_actual | 1;
- } else {
- /*
- * The base request will fit but the reserved space
- * falls off the end. So we don't need an immediate
- * wrap and only need to effectively wait for the
- * reserved size from the start of ringbuffer.
- */
- total_bytes = rq->reserved_space + remain_actual;
- }
- }
-
- if (unlikely(total_bytes > ring->space)) {
- int ret;
-
- /*
- * Space is reserved in the ringbuffer for finalising the
- * request, as that cannot be allowed to fail. During request
- * finalisation, reserved_space is set to 0 to stop the
- * overallocation and the assumption is that then we never need
- * to wait (which has the risk of failing with EINTR).
- *
- * See also i915_request_alloc() and i915_request_add().
- */
- GEM_BUG_ON(!rq->reserved_space);
-
- ret = wait_for_space(ring, total_bytes);
- if (unlikely(ret))
- return ERR_PTR(ret);
- }
-
- if (unlikely(need_wrap)) {
- need_wrap &= ~1;
- GEM_BUG_ON(need_wrap > ring->space);
- GEM_BUG_ON(ring->emit + need_wrap > ring->size);
- GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64)));
-
- /* Fill the tail with MI_NOOP */
- memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64));
- ring->space -= need_wrap;
- ring->emit = 0;
- }
-
- GEM_BUG_ON(ring->emit > ring->size - bytes);
- GEM_BUG_ON(ring->space < bytes);
- cs = ring->vaddr + ring->emit;
- GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs)));
- ring->emit += bytes;
- ring->space -= bytes;
-
- return cs;
-}
-
-/* Align the ring tail to a cacheline boundary */
-int intel_ring_cacheline_align(struct i915_request *rq)
-{
- int num_dwords;
- void *cs;
-
- num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32);
- if (num_dwords == 0)
- return 0;
-
- num_dwords = CACHELINE_DWORDS - num_dwords;
- GEM_BUG_ON(num_dwords & 1);
-
- cs = intel_ring_begin(rq, num_dwords);
- if (IS_ERR(cs))
- return PTR_ERR(cs);
-
- memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2);
- intel_ring_advance(rq, cs);
-
- GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1));
- return 0;
-}
-
static void gen6_bsd_submit_request(struct i915_request *request)
{
struct intel_uncore *uncore = request->engine->uncore;
@@ -2070,7 +1772,6 @@ static int gen6_ring_flush(struct i915_request *rq, u32 mode)
static void i9xx_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = i9xx_submit_request;
- engine->cancel_requests = cancel_requests;
engine->park = NULL;
engine->unpark = NULL;
@@ -2082,7 +1783,7 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine)
engine->submit_request = gen6_bsd_submit_request;
}
-static void ring_destroy(struct intel_engine_cs *engine)
+static void ring_release(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -2091,10 +1792,11 @@ static void ring_destroy(struct intel_engine_cs *engine)
intel_engine_cleanup_common(engine);
- intel_ring_unpin(engine->buffer);
- intel_ring_put(engine->buffer);
+ intel_ring_unpin(engine->legacy.ring);
+ intel_ring_put(engine->legacy.ring);
- kfree(engine);
+ intel_timeline_unpin(engine->legacy.timeline);
+ intel_timeline_put(engine->legacy.timeline);
}
static void setup_irq(struct intel_engine_cs *engine)
@@ -2125,11 +1827,10 @@ static void setup_common(struct intel_engine_cs *engine)
setup_irq(engine);
- engine->destroy = ring_destroy;
-
engine->resume = xcs_resume;
engine->reset.prepare = reset_prepare;
- engine->reset.reset = reset_ring;
+ engine->reset.rewind = reset_rewind;
+ engine->reset.cancel = reset_cancel;
engine->reset.finish = reset_finish;
engine->cops = &ring_context_ops;
@@ -2166,11 +1867,9 @@ static void setup_rcs(struct intel_engine_cs *engine)
engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
if (INTEL_GEN(i915) >= 7) {
- engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen7_render_ring_flush;
engine->emit_fini_breadcrumb = gen7_rcs_emit_breadcrumb;
} else if (IS_GEN(i915, 6)) {
- engine->init_context = intel_rcs_ctx_init;
engine->emit_flush = gen6_render_ring_flush;
engine->emit_fini_breadcrumb = gen6_rcs_emit_breadcrumb;
} else if (IS_GEN(i915, 5)) {
@@ -2242,6 +1941,10 @@ static void setup_vecs(struct intel_engine_cs *engine)
int intel_ring_submission_setup(struct intel_engine_cs *engine)
{
+ struct intel_timeline *timeline;
+ struct intel_ring *ring;
+ int err;
+
setup_common(engine);
switch (engine->class) {
@@ -2262,48 +1965,44 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
return -ENODEV;
}
- return 0;
-}
-
-int intel_ring_submission_init(struct intel_engine_cs *engine)
-{
- struct i915_timeline *timeline;
- struct intel_ring *ring;
- int err;
-
- timeline = i915_timeline_create(engine->i915, engine->status_page.vma);
+ timeline = intel_timeline_create(engine->gt, engine->status_page.vma);
if (IS_ERR(timeline)) {
err = PTR_ERR(timeline);
goto err;
}
GEM_BUG_ON(timeline->has_initial_breadcrumb);
- ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE);
- i915_timeline_put(timeline);
+ err = intel_timeline_pin(timeline);
+ if (err)
+ goto err_timeline;
+
+ ring = intel_engine_create_ring(engine, SZ_16K);
if (IS_ERR(ring)) {
err = PTR_ERR(ring);
- goto err;
+ goto err_timeline_unpin;
}
err = intel_ring_pin(ring);
if (err)
goto err_ring;
- GEM_BUG_ON(engine->buffer);
- engine->buffer = ring;
+ GEM_BUG_ON(engine->legacy.ring);
+ engine->legacy.ring = ring;
+ engine->legacy.timeline = timeline;
- err = intel_engine_init_common(engine);
- if (err)
- goto err_unpin;
+ GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
- GEM_BUG_ON(ring->timeline->hwsp_ggtt != engine->status_page.vma);
+ /* Finally, take ownership and responsibility for cleanup! */
+ engine->release = ring_release;
return 0;
-err_unpin:
- intel_ring_unpin(ring);
err_ring:
intel_ring_put(ring);
+err_timeline_unpin:
+ intel_timeline_unpin(timeline);
+err_timeline:
+ intel_timeline_put(timeline);
err:
intel_engine_cleanup_common(engine);
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_types.h b/drivers/gpu/drm/i915/gt/intel_ring_types.h
new file mode 100644
index 000000000000..d9f17f38e0cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_ring_types.h
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RING_TYPES_H
+#define INTEL_RING_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/kref.h>
+#include <linux/types.h>
+
+/*
+ * Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32))
+
+struct i915_vma;
+
+struct intel_ring {
+ struct kref ref;
+ struct i915_vma *vma;
+ void *vaddr;
+
+ /*
+ * As we have two types of rings, one global to the engine used
+ * by ringbuffer submission and those that are exclusive to a
+ * context used by execlists, we have to play safe and allow
+ * atomic updates to the pin_count. However, the actual pinning
+ * of the context is either done during initialisation for
+ * ringbuffer submission or serialised as part of the context
+ * pinning for execlists, and so we do not need a mutex ourselves
+ * to serialise intel_ring_pin/intel_ring_unpin.
+ */
+ atomic_t pin_count;
+
+ u32 head;
+ u32 tail;
+ u32 emit;
+
+ u32 space;
+ u32 size;
+ u32 effective_size;
+};
+
+#endif /* INTEL_RING_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
new file mode 100644
index 000000000000..d2a3d935d186
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -0,0 +1,1903 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_irq.h"
+#include "intel_gt_pm_irq.h"
+#include "intel_rps.h"
+#include "intel_sideband.h"
+#include "../../../platform/x86/intel_ips.h"
+
+/*
+ * Lock protecting IPS related data structures
+ */
+static DEFINE_SPINLOCK(mchdev_lock);
+
+static struct intel_gt *rps_to_gt(struct intel_rps *rps)
+{
+ return container_of(rps, struct intel_gt, rps);
+}
+
+static struct drm_i915_private *rps_to_i915(struct intel_rps *rps)
+{
+ return rps_to_gt(rps)->i915;
+}
+
+static struct intel_uncore *rps_to_uncore(struct intel_rps *rps)
+{
+ return rps_to_gt(rps)->uncore;
+}
+
+static u32 rps_pm_sanitize_mask(struct intel_rps *rps, u32 mask)
+{
+ return mask & ~rps->pm_intrmsk_mbz;
+}
+
+static inline void set(struct intel_uncore *uncore, i915_reg_t reg, u32 val)
+{
+ intel_uncore_write_fw(uncore, reg, val);
+}
+
+static u32 rps_pm_mask(struct intel_rps *rps, u8 val)
+{
+ u32 mask = 0;
+
+ /* We use UP_EI_EXPIRED interrupts for both up/down in manual mode */
+ if (val > rps->min_freq_softlimit)
+ mask |= (GEN6_PM_RP_UP_EI_EXPIRED |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
+ if (val < rps->max_freq_softlimit)
+ mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD;
+
+ mask &= rps->pm_events;
+
+ return rps_pm_sanitize_mask(rps, ~mask);
+}
+
+static void rps_reset_ei(struct intel_rps *rps)
+{
+ memset(&rps->ei, 0, sizeof(rps->ei));
+}
+
+static void rps_enable_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ rps_reset_ei(rps);
+
+ if (IS_VALLEYVIEW(gt->i915))
+ /* WaGsvRC0ResidencyMethod:vlv */
+ rps->pm_events = GEN6_PM_RP_UP_EI_EXPIRED;
+ else
+ rps->pm_events = (GEN6_PM_RP_UP_THRESHOLD |
+ GEN6_PM_RP_DOWN_THRESHOLD |
+ GEN6_PM_RP_DOWN_TIMEOUT);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_enable_irq(gt, rps->pm_events);
+ spin_unlock_irq(&gt->irq_lock);
+
+ set(gt->uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, rps->cur_freq));
+}
+
+static void gen6_rps_reset_interrupts(struct intel_rps *rps)
+{
+ gen6_gt_pm_reset_iir(rps_to_gt(rps), GEN6_PM_RPS_EVENTS);
+}
+
+static void gen11_rps_reset_interrupts(struct intel_rps *rps)
+{
+ while (gen11_gt_reset_one_iir(rps_to_gt(rps), 0, GEN11_GTPM))
+ ;
+}
+
+static void rps_reset_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (INTEL_GEN(gt->i915) >= 11)
+ gen11_rps_reset_interrupts(rps);
+ else
+ gen6_rps_reset_interrupts(rps);
+
+ rps->pm_iir = 0;
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void rps_disable_interrupts(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ rps->pm_events = 0;
+
+ set(gt->uncore, GEN6_PMINTRMSK, rps_pm_sanitize_mask(rps, ~0u));
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_disable_irq(gt, GEN6_PM_RPS_EVENTS);
+ spin_unlock_irq(&gt->irq_lock);
+
+ intel_synchronize_irq(gt->i915);
+
+ /*
+ * Now that we will not be generating any more work, flush any
+ * outstanding tasks. As we are called on the RPS idle path,
+ * we will reset the GPU to minimum frequencies, so the current
+ * state of the worker can be discarded.
+ */
+ cancel_work_sync(&rps->work);
+
+ rps_reset_interrupts(rps);
+}
+
+static const struct cparams {
+ u16 i;
+ u16 t;
+ u16 m;
+ u16 c;
+} cparams[] = {
+ { 1, 1333, 301, 28664 },
+ { 1, 1066, 294, 24460 },
+ { 1, 800, 294, 25192 },
+ { 0, 1333, 276, 27605 },
+ { 0, 1066, 276, 27605 },
+ { 0, 800, 231, 23784 },
+};
+
+static void gen5_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u8 fmax, fmin, fstart;
+ u32 rgvmodectl;
+ int c_m, i;
+
+ if (i915->fsb_freq <= 3200)
+ c_m = 0;
+ else if (i915->fsb_freq <= 4800)
+ c_m = 1;
+ else
+ c_m = 2;
+
+ for (i = 0; i < ARRAY_SIZE(cparams); i++) {
+ if (cparams[i].i == c_m && cparams[i].t == i915->mem_freq) {
+ rps->ips.m = cparams[i].m;
+ rps->ips.c = cparams[i].c;
+ break;
+ }
+ }
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+ fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+ DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
+
+ rps->min_freq = fmax;
+ rps->max_freq = fmin;
+
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
+}
+
+static unsigned long
+__ips_chipset_val(struct intel_ips *ips)
+{
+ struct intel_uncore *uncore =
+ rps_to_uncore(container_of(ips, struct intel_rps, ips));
+ unsigned long now = jiffies_to_msecs(jiffies), dt;
+ unsigned long result;
+ u64 total, delta;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ /*
+ * Prevent division-by-zero if we are asking too fast.
+ * Also, we don't get interesting results if we are polling
+ * faster than once in 10ms, so just return the saved value
+ * in such cases.
+ */
+ dt = now - ips->last_time1;
+ if (dt <= 10)
+ return ips->chipset_power;
+
+ /* FIXME: handle per-counter overflow */
+ total = intel_uncore_read(uncore, DMIEC);
+ total += intel_uncore_read(uncore, DDREC);
+ total += intel_uncore_read(uncore, CSIEC);
+
+ delta = total - ips->last_count1;
+
+ result = div_u64(div_u64(ips->m * delta, dt) + ips->c, 10);
+
+ ips->last_count1 = total;
+ ips->last_time1 = now;
+
+ ips->chipset_power = result;
+
+ return result;
+}
+
+static unsigned long ips_mch_val(struct intel_uncore *uncore)
+{
+ unsigned int m, x, b;
+ u32 tsfs;
+
+ tsfs = intel_uncore_read(uncore, TSFS);
+ x = intel_uncore_read8(uncore, TR1);
+
+ b = tsfs & TSFS_INTR_MASK;
+ m = (tsfs & TSFS_SLOPE_MASK) >> TSFS_SLOPE_SHIFT;
+
+ return m * x / 127 - b;
+}
+
+static int _pxvid_to_vd(u8 pxvid)
+{
+ if (pxvid == 0)
+ return 0;
+
+ if (pxvid >= 8 && pxvid < 31)
+ pxvid = 31;
+
+ return (pxvid + 2) * 125;
+}
+
+static u32 pvid_to_extvid(struct drm_i915_private *i915, u8 pxvid)
+{
+ const int vd = _pxvid_to_vd(pxvid);
+
+ if (INTEL_INFO(i915)->is_mobile)
+ return max(vd - 1125, 0);
+
+ return vd;
+}
+
+static void __gen5_ips_update(struct intel_ips *ips)
+{
+ struct intel_uncore *uncore =
+ rps_to_uncore(container_of(ips, struct intel_rps, ips));
+ u64 now, delta, dt;
+ u32 count;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ now = ktime_get_raw_ns();
+ dt = now - ips->last_time2;
+ do_div(dt, NSEC_PER_MSEC);
+
+ /* Don't divide by 0 */
+ if (dt <= 10)
+ return;
+
+ count = intel_uncore_read(uncore, GFXEC);
+ delta = count - ips->last_count2;
+
+ ips->last_count2 = count;
+ ips->last_time2 = now;
+
+ /* More magic constants... */
+ ips->gfx_power = div_u64(delta * 1181, dt * 10);
+}
+
+static void gen5_rps_update(struct intel_rps *rps)
+{
+ spin_lock_irq(&mchdev_lock);
+ __gen5_ips_update(&rps->ips);
+ spin_unlock_irq(&mchdev_lock);
+}
+
+static bool gen5_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u16 rgvswctl;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+ if (rgvswctl & MEMCTL_CMD_STS) {
+ DRM_DEBUG("gpu busy, RCS change rejected\n");
+ return false; /* still busy with another command */
+ }
+
+ /* Invert the frequency bin into an ips delay */
+ val = rps->max_freq - val;
+ val = rps->min_freq + val;
+
+ rgvswctl =
+ (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+ (val << MEMCTL_FREQ_SHIFT) |
+ MEMCTL_SFCAVM;
+ intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+ intel_uncore_posting_read16(uncore, MEMSWCTL);
+
+ rgvswctl |= MEMCTL_CMD_STS;
+ intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
+
+ return true;
+}
+
+static unsigned long intel_pxfreq(u32 vidfreq)
+{
+ int div = (vidfreq & 0x3f0000) >> 16;
+ int post = (vidfreq & 0x3000) >> 12;
+ int pre = (vidfreq & 0x7);
+
+ if (!pre)
+ return 0;
+
+ return div * 133333 / (pre << post);
+}
+
+static unsigned int init_emon(struct intel_uncore *uncore)
+{
+ u8 pxw[16];
+ int i;
+
+ /* Disable to program */
+ intel_uncore_write(uncore, ECR, 0);
+ intel_uncore_posting_read(uncore, ECR);
+
+ /* Program energy weights for various events */
+ intel_uncore_write(uncore, SDEW, 0x15040d00);
+ intel_uncore_write(uncore, CSIEW0, 0x007f0000);
+ intel_uncore_write(uncore, CSIEW1, 0x1e220004);
+ intel_uncore_write(uncore, CSIEW2, 0x04000004);
+
+ for (i = 0; i < 5; i++)
+ intel_uncore_write(uncore, PEW(i), 0);
+ for (i = 0; i < 3; i++)
+ intel_uncore_write(uncore, DEW(i), 0);
+
+ /* Program P-state weights to account for frequency power adjustment */
+ for (i = 0; i < 16; i++) {
+ u32 pxvidfreq = intel_uncore_read(uncore, PXVFREQ(i));
+ unsigned int freq = intel_pxfreq(pxvidfreq);
+ unsigned int vid =
+ (pxvidfreq & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+ unsigned int val;
+
+ val = vid * vid * freq / 1000 * 255;
+ val /= 127 * 127 * 900;
+
+ pxw[i] = val;
+ }
+ /* Render standby states get 0 weight */
+ pxw[14] = 0;
+ pxw[15] = 0;
+
+ for (i = 0; i < 4; i++) {
+ intel_uncore_write(uncore, PXW(i),
+ pxw[i * 4 + 0] << 24 |
+ pxw[i * 4 + 1] << 16 |
+ pxw[i * 4 + 2] << 8 |
+ pxw[i * 4 + 3] << 0);
+ }
+
+ /* Adjust magic regs to magic values (more experimental results) */
+ intel_uncore_write(uncore, OGW0, 0);
+ intel_uncore_write(uncore, OGW1, 0);
+ intel_uncore_write(uncore, EG0, 0x00007f00);
+ intel_uncore_write(uncore, EG1, 0x0000000e);
+ intel_uncore_write(uncore, EG2, 0x000e0000);
+ intel_uncore_write(uncore, EG3, 0x68000300);
+ intel_uncore_write(uncore, EG4, 0x42000000);
+ intel_uncore_write(uncore, EG5, 0x00140031);
+ intel_uncore_write(uncore, EG6, 0);
+ intel_uncore_write(uncore, EG7, 0);
+
+ for (i = 0; i < 8; i++)
+ intel_uncore_write(uncore, PXWL(i), 0);
+
+ /* Enable PMON + select events */
+ intel_uncore_write(uncore, ECR, 0x80000019);
+
+ return intel_uncore_read(uncore, LCFUSE02) & LCFUSE_HIV_MASK;
+}
+
+static bool gen5_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u8 fstart, vstart;
+ u32 rgvmodectl;
+
+ spin_lock_irq(&mchdev_lock);
+
+ rgvmodectl = intel_uncore_read(uncore, MEMMODECTL);
+
+ /* Enable temp reporting */
+ intel_uncore_write16(uncore, PMMISC,
+ intel_uncore_read16(uncore, PMMISC) | MCPPCE_EN);
+ intel_uncore_write16(uncore, TSC1,
+ intel_uncore_read16(uncore, TSC1) | TSE);
+
+ /* 100ms RC evaluation intervals */
+ intel_uncore_write(uncore, RCUPEI, 100000);
+ intel_uncore_write(uncore, RCDNEI, 100000);
+
+ /* Set max/min thresholds to 90ms and 80ms respectively */
+ intel_uncore_write(uncore, RCBMAXAVG, 90000);
+ intel_uncore_write(uncore, RCBMINAVG, 80000);
+
+ intel_uncore_write(uncore, MEMIHYST, 1);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+
+ vstart = (intel_uncore_read(uncore, PXVFREQ(fstart)) &
+ PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT;
+
+ intel_uncore_write(uncore,
+ MEMINTREN,
+ MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
+
+ intel_uncore_write(uncore, VIDSTART, vstart);
+ intel_uncore_posting_read(uncore, VIDSTART);
+
+ rgvmodectl |= MEMMODE_SWMODE_EN;
+ intel_uncore_write(uncore, MEMMODECTL, rgvmodectl);
+
+ if (wait_for_atomic((intel_uncore_read(uncore, MEMSWCTL) &
+ MEMCTL_CMD_STS) == 0, 10))
+ DRM_ERROR("stuck trying to change perf mode\n");
+ mdelay(1);
+
+ gen5_rps_set(rps, rps->cur_freq);
+
+ rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
+ rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
+ rps->ips.last_count1 += intel_uncore_read(uncore, CSIEC);
+ rps->ips.last_time1 = jiffies_to_msecs(jiffies);
+
+ rps->ips.last_count2 = intel_uncore_read(uncore, GFXEC);
+ rps->ips.last_time2 = ktime_get_raw_ns();
+
+ spin_unlock_irq(&mchdev_lock);
+
+ rps->ips.corr = init_emon(uncore);
+
+ return true;
+}
+
+static void gen5_rps_disable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u16 rgvswctl;
+
+ spin_lock_irq(&mchdev_lock);
+
+ rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
+
+ /* Ack interrupts, disable EFC interrupt */
+ intel_uncore_write(uncore, MEMINTREN,
+ intel_uncore_read(uncore, MEMINTREN) &
+ ~MEMINT_EVAL_CHG_EN);
+ intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+ intel_uncore_write(uncore, DEIER,
+ intel_uncore_read(uncore, DEIER) & ~DE_PCU_EVENT);
+ intel_uncore_write(uncore, DEIIR, DE_PCU_EVENT);
+ intel_uncore_write(uncore, DEIMR,
+ intel_uncore_read(uncore, DEIMR) | DE_PCU_EVENT);
+
+ /* Go back to the starting frequency */
+ gen5_rps_set(rps, rps->idle_freq);
+ mdelay(1);
+ rgvswctl |= MEMCTL_CMD_STS;
+ intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
+ mdelay(1);
+
+ spin_unlock_irq(&mchdev_lock);
+}
+
+static u32 rps_limits(struct intel_rps *rps, u8 val)
+{
+ u32 limits;
+
+ /*
+ * Only set the down limit when we've reached the lowest level to avoid
+ * getting more interrupts, otherwise leave this clear. This prevents a
+ * race in the hw when coming out of rc6: There's a tiny window where
+ * the hw runs at the minimal clock before selecting the desired
+ * frequency, if the down threshold expires in that window we will not
+ * receive a down interrupt.
+ */
+ if (INTEL_GEN(rps_to_i915(rps)) >= 9) {
+ limits = rps->max_freq_softlimit << 23;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 14;
+ } else {
+ limits = rps->max_freq_softlimit << 24;
+ if (val <= rps->min_freq_softlimit)
+ limits |= rps->min_freq_softlimit << 16;
+ }
+
+ return limits;
+}
+
+static void rps_set_power(struct intel_rps *rps, int new_power)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 threshold_up = 0, threshold_down = 0; /* in % */
+ u32 ei_up = 0, ei_down = 0;
+
+ lockdep_assert_held(&rps->power.mutex);
+
+ if (new_power == rps->power.mode)
+ return;
+
+ /* Note the units here are not exactly 1us, but 1280ns. */
+ switch (new_power) {
+ case LOW_POWER:
+ /* Upclock if more than 95% busy over 16ms */
+ ei_up = 16000;
+ threshold_up = 95;
+
+ /* Downclock if less than 85% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 85;
+ break;
+
+ case BETWEEN:
+ /* Upclock if more than 90% busy over 13ms */
+ ei_up = 13000;
+ threshold_up = 90;
+
+ /* Downclock if less than 75% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 75;
+ break;
+
+ case HIGH_POWER:
+ /* Upclock if more than 85% busy over 10ms */
+ ei_up = 10000;
+ threshold_up = 85;
+
+ /* Downclock if less than 60% busy over 32ms */
+ ei_down = 32000;
+ threshold_down = 60;
+ break;
+ }
+
+ /* When byt can survive without system hang with dynamic
+ * sw freq adjustments, this restriction can be lifted.
+ */
+ if (IS_VALLEYVIEW(i915))
+ goto skip_hw_write;
+
+ set(uncore, GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(i915, ei_up));
+ set(uncore, GEN6_RP_UP_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915, ei_up * threshold_up / 100));
+
+ set(uncore, GEN6_RP_DOWN_EI, GT_INTERVAL_FROM_US(i915, ei_down));
+ set(uncore, GEN6_RP_DOWN_THRESHOLD,
+ GT_INTERVAL_FROM_US(i915, ei_down * threshold_down / 100));
+
+ set(uncore, GEN6_RP_CONTROL,
+ (INTEL_GEN(i915) > 9 ? 0 : GEN6_RP_MEDIA_TURBO) |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
+
+skip_hw_write:
+ rps->power.mode = new_power;
+ rps->power.up_threshold = threshold_up;
+ rps->power.down_threshold = threshold_down;
+}
+
+static void gen6_rps_set_thresholds(struct intel_rps *rps, u8 val)
+{
+ int new_power;
+
+ new_power = rps->power.mode;
+ switch (rps->power.mode) {
+ case LOW_POWER:
+ if (val > rps->efficient_freq + 1 &&
+ val > rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+
+ case BETWEEN:
+ if (val <= rps->efficient_freq &&
+ val < rps->cur_freq)
+ new_power = LOW_POWER;
+ else if (val >= rps->rp0_freq &&
+ val > rps->cur_freq)
+ new_power = HIGH_POWER;
+ break;
+
+ case HIGH_POWER:
+ if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
+ val < rps->cur_freq)
+ new_power = BETWEEN;
+ break;
+ }
+ /* Max/min bins are special */
+ if (val <= rps->min_freq_softlimit)
+ new_power = LOW_POWER;
+ if (val >= rps->max_freq_softlimit)
+ new_power = HIGH_POWER;
+
+ mutex_lock(&rps->power.mutex);
+ if (rps->power.interactive)
+ new_power = HIGH_POWER;
+ rps_set_power(rps, new_power);
+ mutex_unlock(&rps->power.mutex);
+}
+
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive)
+{
+ mutex_lock(&rps->power.mutex);
+ if (interactive) {
+ if (!rps->power.interactive++ && rps->active)
+ rps_set_power(rps, HIGH_POWER);
+ } else {
+ GEM_BUG_ON(!rps->power.interactive);
+ rps->power.interactive--;
+ }
+ mutex_unlock(&rps->power.mutex);
+}
+
+static int gen6_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 swreq;
+
+ if (INTEL_GEN(i915) >= 9)
+ swreq = GEN9_FREQUENCY(val);
+ else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ swreq = HSW_FREQUENCY(val);
+ else
+ swreq = (GEN6_FREQUENCY(val) |
+ GEN6_OFFSET(0) |
+ GEN6_AGGRESSIVE_TURBO);
+ set(uncore, GEN6_RPNSWREQ, swreq);
+
+ return 0;
+}
+
+static int vlv_rps_set(struct intel_rps *rps, u8 val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ int err;
+
+ vlv_punit_get(i915);
+ err = vlv_punit_write(i915, PUNIT_REG_GPU_FREQ_REQ, val);
+ vlv_punit_put(i915);
+
+ return err;
+}
+
+static int rps_set(struct intel_rps *rps, u8 val, bool update)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ int err;
+
+ if (INTEL_GEN(i915) < 6)
+ return 0;
+
+ if (val == rps->last_freq)
+ return 0;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ err = vlv_rps_set(rps, val);
+ else
+ err = gen6_rps_set(rps, val);
+ if (err)
+ return err;
+
+ if (update)
+ gen6_rps_set_thresholds(rps, val);
+ rps->last_freq = val;
+
+ return 0;
+}
+
+void intel_rps_unpark(struct intel_rps *rps)
+{
+ u8 freq;
+
+ if (!rps->enabled)
+ return;
+
+ /*
+ * Use the user's desired frequency as a guide, but for better
+ * performance, jump directly to RPe as our starting frequency.
+ */
+ mutex_lock(&rps->lock);
+ rps->active = true;
+ freq = max(rps->cur_freq, rps->efficient_freq),
+ freq = clamp(freq, rps->min_freq_softlimit, rps->max_freq_softlimit);
+ intel_rps_set(rps, freq);
+ rps->last_adj = 0;
+ mutex_unlock(&rps->lock);
+
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6)
+ rps_enable_interrupts(rps);
+
+ if (IS_GEN(rps_to_i915(rps), 5))
+ gen5_rps_update(rps);
+}
+
+void intel_rps_park(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (!rps->enabled)
+ return;
+
+ if (INTEL_GEN(i915) >= 6)
+ rps_disable_interrupts(rps);
+
+ rps->active = false;
+ if (rps->last_freq <= rps->idle_freq)
+ return;
+
+ /*
+ * The punit delays the write of the frequency and voltage until it
+ * determines the GPU is awake. During normal usage we don't want to
+ * waste power changing the frequency if the GPU is sleeping (rc6).
+ * However, the GPU and driver is now idle and we do not want to delay
+ * switching to minimum voltage (reducing power whilst idle) as we do
+ * not expect to be woken in the near future and so must flush the
+ * change by waking the device.
+ *
+ * We choose to take the media powerwell (either would do to trick the
+ * punit into committing the voltage change) as that takes a lot less
+ * power than the render powerwell.
+ */
+ intel_uncore_forcewake_get(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+ rps_set(rps, rps->idle_freq, false);
+ intel_uncore_forcewake_put(rps_to_uncore(rps), FORCEWAKE_MEDIA);
+}
+
+void intel_rps_boost(struct i915_request *rq)
+{
+ struct intel_rps *rps = &rq->engine->gt->rps;
+ unsigned long flags;
+
+ if (i915_request_signaled(rq) || !rps->active)
+ return;
+
+ /* Serializes with i915_request_retire() */
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!i915_request_has_waitboost(rq) &&
+ !dma_fence_is_signaled_locked(&rq->fence)) {
+ set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags);
+
+ if (!atomic_fetch_inc(&rps->num_waiters) &&
+ READ_ONCE(rps->cur_freq) < rps->boost_freq)
+ schedule_work(&rps->work);
+
+ atomic_inc(&rps->boosts);
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+int intel_rps_set(struct intel_rps *rps, u8 val)
+{
+ int err;
+
+ lockdep_assert_held(&rps->lock);
+ GEM_BUG_ON(val > rps->max_freq);
+ GEM_BUG_ON(val < rps->min_freq);
+
+ if (rps->active) {
+ err = rps_set(rps, val, true);
+ if (err)
+ return err;
+
+ /*
+ * Make sure we continue to get interrupts
+ * until we hit the minimum or maximum frequencies.
+ */
+ if (INTEL_GEN(rps_to_i915(rps)) >= 6) {
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ set(uncore,
+ GEN6_RP_INTERRUPT_LIMITS, rps_limits(rps, val));
+
+ set(uncore, GEN6_PMINTRMSK, rps_pm_mask(rps, val));
+ }
+ }
+
+ rps->cur_freq = val;
+ return 0;
+}
+
+static void gen6_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* All of these values are in units of 50MHz */
+
+ /* static values from HW: RP0 > RP1 > RPn (min_freq) */
+ if (IS_GEN9_LP(i915)) {
+ u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP);
+
+ rps->rp0_freq = (rp_state_cap >> 16) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 0) & 0xff;
+ } else {
+ u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
+
+ rps->rp0_freq = (rp_state_cap >> 0) & 0xff;
+ rps->rp1_freq = (rp_state_cap >> 8) & 0xff;
+ rps->min_freq = (rp_state_cap >> 16) & 0xff;
+ }
+
+ /* hw_max = RP0 until we check for overclocking */
+ rps->max_freq = rps->rp0_freq;
+
+ rps->efficient_freq = rps->rp1_freq;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915) ||
+ IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ u32 ddcc_status = 0;
+
+ if (sandybridge_pcode_read(i915,
+ HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL,
+ &ddcc_status, NULL) == 0)
+ rps->efficient_freq =
+ clamp_t(u8,
+ (ddcc_status >> 8) & 0xff,
+ rps->min_freq,
+ rps->max_freq);
+ }
+
+ if (IS_GEN9_BC(i915) || INTEL_GEN(i915) >= 10) {
+ /* Store the frequency values in 16.66 MHZ units, which is
+ * the natural hardware unit for SKL
+ */
+ rps->rp0_freq *= GEN9_FREQ_SCALER;
+ rps->rp1_freq *= GEN9_FREQ_SCALER;
+ rps->min_freq *= GEN9_FREQ_SCALER;
+ rps->max_freq *= GEN9_FREQ_SCALER;
+ rps->efficient_freq *= GEN9_FREQ_SCALER;
+ }
+}
+
+static bool rps_reset(struct intel_rps *rps)
+{
+ /* force a reset */
+ rps->power.mode = -1;
+ rps->last_freq = -1;
+
+ if (rps_set(rps, rps->min_freq, true)) {
+ DRM_ERROR("Failed to reset RPS to initial values\n");
+ return false;
+ }
+
+ rps->cur_freq = rps->min_freq;
+ return true;
+}
+
+/* See the Gen9_GT_PM_Programming_Guide doc for the below */
+static bool gen9_rps_enable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* Program defaults and thresholds for RPS */
+ if (IS_GEN(i915, 9))
+ intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+ GEN9_FREQUENCY(rps->rp1_freq));
+
+ /* 1 second timeout */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+ GT_INTERVAL_FROM_US(i915, 1000000));
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 0xa);
+
+ return rps_reset(rps);
+}
+
+static bool gen8_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_write_fw(uncore, GEN6_RC_VIDEO_FREQ,
+ HSW_FREQUENCY(rps->rp1_freq));
+
+ /* NB: Docs say 1s, and 1000000 - which aren't equivalent */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT,
+ 100000000 / 128); /* 1 second timeout */
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ return rps_reset(rps);
+}
+
+static bool gen6_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ /* Power down if completely idle for over 50ms */
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 50000);
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ return rps_reset(rps);
+}
+
+static int chv_rps_max_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+ switch (RUNTIME_INFO(i915)->sseu.eu_total) {
+ case 8:
+ /* (2 * 4) config */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS4EU_FUSE_SHIFT;
+ break;
+ case 12:
+ /* (2 * 6) config */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS6EU_FUSE_SHIFT;
+ break;
+ case 16:
+ /* (2 * 8) config */
+ default:
+ /* Setting (2 * 8) Min RP0 for any other combination */
+ val >>= FB_GFX_FMAX_AT_VMAX_2SS8EU_FUSE_SHIFT;
+ break;
+ }
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static int chv_rps_rpe_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, PUNIT_GPU_DUTYCYCLE_REG);
+ val >>= PUNIT_GPU_DUTYCYCLE_RPE_FREQ_SHIFT;
+
+ return val & PUNIT_GPU_DUTYCYCLE_RPE_FREQ_MASK;
+}
+
+static int chv_rps_guar_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMAX_AT_VMAX_FUSE);
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static u32 chv_rps_min_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, FB_GFX_FMIN_AT_VMIN_FUSE);
+ val >>= FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT;
+
+ return val & FB_GFX_FREQ_FUSE_MASK;
+}
+
+static bool chv_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ /* 1: Program defaults and thresholds for RPS*/
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ /* 2: Enable RPS */
+ intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_AVG);
+
+ /* Setting Fixed Bias */
+ vlv_punit_get(i915);
+
+ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | CHV_BIAS_CPU_50_SOC_50;
+ vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+ vlv_punit_put(i915);
+
+ /* RPS code assumes GPLL is used */
+ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+ DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+ return rps_reset(rps);
+}
+
+static int vlv_rps_guar_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rp1;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+ rp1 = val & FB_GFX_FGUARANTEED_FREQ_FUSE_MASK;
+ rp1 >>= FB_GFX_FGUARANTEED_FREQ_FUSE_SHIFT;
+
+ return rp1;
+}
+
+static int vlv_rps_max_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rp0;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FREQ_FUSE);
+
+ rp0 = (val & FB_GFX_MAX_FREQ_FUSE_MASK) >> FB_GFX_MAX_FREQ_FUSE_SHIFT;
+ /* Clamp to max */
+ rp0 = min_t(u32, rp0, 0xea);
+
+ return rp0;
+}
+
+static int vlv_rps_rpe_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val, rpe;
+
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_LO);
+ rpe = (val & FB_FMAX_VMIN_FREQ_LO_MASK) >> FB_FMAX_VMIN_FREQ_LO_SHIFT;
+ val = vlv_nc_read(i915, IOSF_NC_FB_GFX_FMAX_FUSE_HI);
+ rpe |= (val & FB_FMAX_VMIN_FREQ_HI_MASK) << 5;
+
+ return rpe;
+}
+
+static int vlv_rps_min_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_LFM) & 0xff;
+ /*
+ * According to the BYT Punit GPU turbo HAS 1.1.6.3 the minimum value
+ * for the minimum frequency in GPLL mode is 0xc1. Contrary to this on
+ * a BYT-M B0 the above register contains 0xbf. Moreover when setting
+ * a frequency Punit will not allow values below 0xc0. Clamp it 0xc0
+ * to make sure it matches what Punit accepts.
+ */
+ return max_t(u32, val, 0xc0);
+}
+
+static bool vlv_rps_enable(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_TIMEOUT, 1000000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_THRESHOLD, 59400);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_THRESHOLD, 245000);
+ intel_uncore_write_fw(uncore, GEN6_RP_UP_EI, 66000);
+ intel_uncore_write_fw(uncore, GEN6_RP_DOWN_EI, 350000);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_IDLE_HYSTERSIS, 10);
+
+ intel_uncore_write_fw(uncore, GEN6_RP_CONTROL,
+ GEN6_RP_MEDIA_TURBO |
+ GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_IS_GFX |
+ GEN6_RP_ENABLE |
+ GEN6_RP_UP_BUSY_AVG |
+ GEN6_RP_DOWN_IDLE_CONT);
+
+ vlv_punit_get(i915);
+
+ /* Setting Fixed Bias */
+ val = VLV_OVERRIDE_EN | VLV_SOC_TDP_EN | VLV_BIAS_CPU_125_SOC_875;
+ vlv_punit_write(i915, VLV_TURBO_SOC_OVERRIDE, val);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+
+ vlv_punit_put(i915);
+
+ /* RPS code assumes GPLL is used */
+ WARN_ONCE((val & GPLLENABLE) == 0, "GPLL not enabled\n");
+
+ DRM_DEBUG_DRIVER("GPLL enabled? %s\n", yesno(val & GPLLENABLE));
+ DRM_DEBUG_DRIVER("GPU status: 0x%08x\n", val);
+
+ return rps_reset(rps);
+}
+
+static unsigned long __ips_gfx_val(struct intel_ips *ips)
+{
+ struct intel_rps *rps = container_of(ips, typeof(*rps), ips);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ unsigned long t, corr, state1, corr2, state2;
+ u32 pxvid, ext_v;
+
+ lockdep_assert_held(&mchdev_lock);
+
+ pxvid = intel_uncore_read(uncore, PXVFREQ(rps->cur_freq));
+ pxvid = (pxvid >> 24) & 0x7f;
+ ext_v = pvid_to_extvid(rps_to_i915(rps), pxvid);
+
+ state1 = ext_v;
+
+ /* Revel in the empirically derived constants */
+
+ /* Correction factor in 1/100000 units */
+ t = ips_mch_val(uncore);
+ if (t > 80)
+ corr = t * 2349 + 135940;
+ else if (t >= 50)
+ corr = t * 964 + 29317;
+ else /* < 50 */
+ corr = t * 301 + 1004;
+
+ corr = corr * 150142 * state1 / 10000 - 78642;
+ corr /= 100000;
+ corr2 = corr * ips->corr;
+
+ state2 = corr2 * state1 / 10000;
+ state2 /= 100; /* convert to mW */
+
+ __gen5_ips_update(ips);
+
+ return ips->gfx_power + state2;
+}
+
+void intel_rps_enable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ if (IS_CHERRYVIEW(i915))
+ rps->enabled = chv_rps_enable(rps);
+ else if (IS_VALLEYVIEW(i915))
+ rps->enabled = vlv_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 9)
+ rps->enabled = gen9_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 8)
+ rps->enabled = gen8_rps_enable(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ rps->enabled = gen6_rps_enable(rps);
+ else if (IS_IRONLAKE_M(i915))
+ rps->enabled = gen5_rps_enable(rps);
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+ if (!rps->enabled)
+ return;
+
+ WARN_ON(rps->max_freq < rps->min_freq);
+ WARN_ON(rps->idle_freq > rps->max_freq);
+
+ WARN_ON(rps->efficient_freq < rps->min_freq);
+ WARN_ON(rps->efficient_freq > rps->max_freq);
+}
+
+static void gen6_rps_disable(struct intel_rps *rps)
+{
+ set(rps_to_uncore(rps), GEN6_RP_CONTROL, 0);
+}
+
+void intel_rps_disable(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ rps->enabled = false;
+
+ if (INTEL_GEN(i915) >= 6)
+ gen6_rps_disable(rps);
+ else if (IS_IRONLAKE_M(i915))
+ gen5_rps_disable(rps);
+}
+
+static int byt_gpu_freq(struct intel_rps *rps, int val)
+{
+ /*
+ * N = val - 0xb7
+ * Slow = Fast = GPLL ref * N
+ */
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * (val - 0xb7), 1000);
+}
+
+static int byt_freq_opcode(struct intel_rps *rps, int val)
+{
+ return DIV_ROUND_CLOSEST(1000 * val, rps->gpll_ref_freq) + 0xb7;
+}
+
+static int chv_gpu_freq(struct intel_rps *rps, int val)
+{
+ /*
+ * N = val / 2
+ * CU (slow) = CU2x (fast) / 2 = GPLL ref * N / 2
+ */
+ return DIV_ROUND_CLOSEST(rps->gpll_ref_freq * val, 2 * 2 * 1000);
+}
+
+static int chv_freq_opcode(struct intel_rps *rps, int val)
+{
+ /* CHV needs even values */
+ return DIV_ROUND_CLOSEST(2 * 1000 * val, rps->gpll_ref_freq) * 2;
+}
+
+int intel_gpu_freq(struct intel_rps *rps, int val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (INTEL_GEN(i915) >= 9)
+ return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER,
+ GEN9_FREQ_SCALER);
+ else if (IS_CHERRYVIEW(i915))
+ return chv_gpu_freq(rps, val);
+ else if (IS_VALLEYVIEW(i915))
+ return byt_gpu_freq(rps, val);
+ else
+ return val * GT_FREQUENCY_MULTIPLIER;
+}
+
+int intel_freq_opcode(struct intel_rps *rps, int val)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (INTEL_GEN(i915) >= 9)
+ return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER,
+ GT_FREQUENCY_MULTIPLIER);
+ else if (IS_CHERRYVIEW(i915))
+ return chv_freq_opcode(rps, val);
+ else if (IS_VALLEYVIEW(i915))
+ return byt_freq_opcode(rps, val);
+ else
+ return DIV_ROUND_CLOSEST(val, GT_FREQUENCY_MULTIPLIER);
+}
+
+static void vlv_init_gpll_ref_freq(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ rps->gpll_ref_freq =
+ vlv_get_cck_clock(i915, "GPLL ref",
+ CCK_GPLL_CLOCK_CONTROL,
+ i915->czclk_freq);
+
+ DRM_DEBUG_DRIVER("GPLL reference freq: %d kHz\n", rps->gpll_ref_freq);
+}
+
+static void vlv_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ vlv_iosf_sb_get(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ vlv_init_gpll_ref_freq(rps);
+
+ val = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ switch ((val >> 6) & 3) {
+ case 0:
+ case 1:
+ i915->mem_freq = 800;
+ break;
+ case 2:
+ i915->mem_freq = 1066;
+ break;
+ case 3:
+ i915->mem_freq = 1333;
+ break;
+ }
+ DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+ rps->max_freq = vlv_rps_max_freq(rps);
+ rps->rp0_freq = rps->max_freq;
+ DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->max_freq);
+
+ rps->efficient_freq = vlv_rps_rpe_freq(rps);
+ DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq),
+ rps->efficient_freq);
+
+ rps->rp1_freq = vlv_rps_guar_freq(rps);
+ DRM_DEBUG_DRIVER("RP1(Guar Freq) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq),
+ rps->rp1_freq);
+
+ rps->min_freq = vlv_rps_min_freq(rps);
+ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ rps->min_freq);
+
+ vlv_iosf_sb_put(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+}
+
+static void chv_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 val;
+
+ vlv_iosf_sb_get(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ vlv_init_gpll_ref_freq(rps);
+
+ val = vlv_cck_read(i915, CCK_FUSE_REG);
+
+ switch ((val >> 2) & 0x7) {
+ case 3:
+ i915->mem_freq = 2000;
+ break;
+ default:
+ i915->mem_freq = 1600;
+ break;
+ }
+ DRM_DEBUG_DRIVER("DDR speed: %d MHz\n", i915->mem_freq);
+
+ rps->max_freq = chv_rps_max_freq(rps);
+ rps->rp0_freq = rps->max_freq;
+ DRM_DEBUG_DRIVER("max GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->max_freq),
+ rps->max_freq);
+
+ rps->efficient_freq = chv_rps_rpe_freq(rps);
+ DRM_DEBUG_DRIVER("RPe GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->efficient_freq),
+ rps->efficient_freq);
+
+ rps->rp1_freq = chv_rps_guar_freq(rps);
+ DRM_DEBUG_DRIVER("RP1(Guar) GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->rp1_freq),
+ rps->rp1_freq);
+
+ rps->min_freq = chv_rps_min_freq(rps);
+ DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
+ intel_gpu_freq(rps, rps->min_freq),
+ rps->min_freq);
+
+ vlv_iosf_sb_put(i915,
+ BIT(VLV_IOSF_SB_PUNIT) |
+ BIT(VLV_IOSF_SB_NC) |
+ BIT(VLV_IOSF_SB_CCK));
+
+ WARN_ONCE((rps->max_freq | rps->efficient_freq | rps->rp1_freq |
+ rps->min_freq) & 1,
+ "Odd GPU freq values\n");
+}
+
+static void vlv_c0_read(struct intel_uncore *uncore, struct intel_rps_ei *ei)
+{
+ ei->ktime = ktime_get_raw();
+ ei->render_c0 = intel_uncore_read(uncore, VLV_RENDER_C0_COUNT);
+ ei->media_c0 = intel_uncore_read(uncore, VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ const struct intel_rps_ei *prev = &rps->ei;
+ struct intel_rps_ei now;
+ u32 events = 0;
+
+ if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+ return 0;
+
+ vlv_c0_read(uncore, &now);
+
+ if (prev->ktime) {
+ u64 time, c0;
+ u32 render, media;
+
+ time = ktime_us_delta(now.ktime, prev->ktime);
+
+ time *= rps_to_i915(rps)->czclk_freq;
+
+ /* Workload can be split between render + media,
+ * e.g. SwapBuffers being blitted in X after being rendered in
+ * mesa. To account for this we need to combine both engines
+ * into our activity counter.
+ */
+ render = now.render_c0 - prev->render_c0;
+ media = now.media_c0 - prev->media_c0;
+ c0 = max(render, media);
+ c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+ if (c0 > time * rps->power.up_threshold)
+ events = GEN6_PM_RP_UP_THRESHOLD;
+ else if (c0 < time * rps->power.down_threshold)
+ events = GEN6_PM_RP_DOWN_THRESHOLD;
+ }
+
+ rps->ei = now;
+ return events;
+}
+
+static void rps_work(struct work_struct *work)
+{
+ struct intel_rps *rps = container_of(work, typeof(*rps), work);
+ struct intel_gt *gt = rps_to_gt(rps);
+ bool client_boost = false;
+ int new_freq, adj, min, max;
+ u32 pm_iir = 0;
+
+ spin_lock_irq(&gt->irq_lock);
+ pm_iir = fetch_and_zero(&rps->pm_iir);
+ client_boost = atomic_read(&rps->num_waiters);
+ spin_unlock_irq(&gt->irq_lock);
+
+ /* Make sure we didn't queue anything we're not going to process. */
+ if ((pm_iir & rps->pm_events) == 0 && !client_boost)
+ goto out;
+
+ mutex_lock(&rps->lock);
+
+ pm_iir |= vlv_wa_c0_ei(rps, pm_iir);
+
+ adj = rps->last_adj;
+ new_freq = rps->cur_freq;
+ min = rps->min_freq_softlimit;
+ max = rps->max_freq_softlimit;
+ if (client_boost)
+ max = rps->max_freq;
+ if (client_boost && new_freq < rps->boost_freq) {
+ new_freq = rps->boost_freq;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+ if (adj > 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(gt->i915) ? 2 : 1;
+
+ if (new_freq >= rps->max_freq_softlimit)
+ adj = 0;
+ } else if (client_boost) {
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+ if (rps->cur_freq > rps->efficient_freq)
+ new_freq = rps->efficient_freq;
+ else if (rps->cur_freq > rps->min_freq_softlimit)
+ new_freq = rps->min_freq_softlimit;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+ if (adj < 0)
+ adj *= 2;
+ else /* CHV needs even encode values */
+ adj = IS_CHERRYVIEW(gt->i915) ? -2 : -1;
+
+ if (new_freq <= rps->min_freq_softlimit)
+ adj = 0;
+ } else { /* unknown event */
+ adj = 0;
+ }
+
+ rps->last_adj = adj;
+
+ /*
+ * Limit deboosting and boosting to keep ourselves at the extremes
+ * when in the respective power modes (i.e. slowly decrease frequencies
+ * while in the HIGH_POWER zone and slowly increase frequencies while
+ * in the LOW_POWER zone). On idle, we will hit the timeout and drop
+ * to the next level quickly, and conversely if busy we expect to
+ * hit a waitboost and rapidly switch into max power.
+ */
+ if ((adj < 0 && rps->power.mode == HIGH_POWER) ||
+ (adj > 0 && rps->power.mode == LOW_POWER))
+ rps->last_adj = 0;
+
+ /* sysfs frequency interfaces may have snuck in while servicing the
+ * interrupt
+ */
+ new_freq += adj;
+ new_freq = clamp_t(int, new_freq, min, max);
+
+ if (intel_rps_set(rps, new_freq)) {
+ DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+ rps->last_adj = 0;
+ }
+
+ mutex_unlock(&rps->lock);
+
+out:
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_unmask_irq(gt, rps->pm_events);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ const u32 events = rps->pm_events & pm_iir;
+
+ lockdep_assert_held(&gt->irq_lock);
+
+ if (unlikely(!events))
+ return;
+
+ gen6_gt_pm_mask_irq(gt, events);
+
+ rps->pm_iir |= events;
+ schedule_work(&rps->work);
+}
+
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ if (pm_iir & rps->pm_events) {
+ spin_lock(&gt->irq_lock);
+ gen6_gt_pm_mask_irq(gt, pm_iir & rps->pm_events);
+ rps->pm_iir |= pm_iir & rps->pm_events;
+ schedule_work(&rps->work);
+ spin_unlock(&gt->irq_lock);
+ }
+
+ if (INTEL_GEN(gt->i915) >= 8)
+ return;
+
+ if (pm_iir & PM_VEBOX_USER_INTERRUPT)
+ intel_engine_signal_breadcrumbs(gt->engine[VECS0]);
+
+ if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
+ DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
+}
+
+void gen5_rps_irq_handler(struct intel_rps *rps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 busy_up, busy_down, max_avg, min_avg;
+ u8 new_freq;
+
+ spin_lock(&mchdev_lock);
+
+ intel_uncore_write16(uncore,
+ MEMINTRSTS,
+ intel_uncore_read(uncore, MEMINTRSTS));
+
+ intel_uncore_write16(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
+ busy_up = intel_uncore_read(uncore, RCPREVBSYTUPAVG);
+ busy_down = intel_uncore_read(uncore, RCPREVBSYTDNAVG);
+ max_avg = intel_uncore_read(uncore, RCBMAXAVG);
+ min_avg = intel_uncore_read(uncore, RCBMINAVG);
+
+ /* Handle RCS change request from hw */
+ new_freq = rps->cur_freq;
+ if (busy_up > max_avg)
+ new_freq++;
+ else if (busy_down < min_avg)
+ new_freq--;
+ new_freq = clamp(new_freq,
+ rps->min_freq_softlimit,
+ rps->max_freq_softlimit);
+
+ if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq))
+ rps->cur_freq = new_freq;
+
+ spin_unlock(&mchdev_lock);
+}
+
+void intel_rps_init_early(struct intel_rps *rps)
+{
+ mutex_init(&rps->lock);
+ mutex_init(&rps->power.mutex);
+
+ INIT_WORK(&rps->work, rps_work);
+
+ atomic_set(&rps->num_waiters, 0);
+}
+
+void intel_rps_init(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (IS_CHERRYVIEW(i915))
+ chv_rps_init(rps);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_rps_init(rps);
+ else if (INTEL_GEN(i915) >= 6)
+ gen6_rps_init(rps);
+ else if (IS_IRONLAKE_M(i915))
+ gen5_rps_init(rps);
+
+ /* Derive initial user preferences/limits from the hardware limits */
+ rps->max_freq_softlimit = rps->max_freq;
+ rps->min_freq_softlimit = rps->min_freq;
+
+ /* After setting max-softlimit, find the overclock max freq */
+ if (IS_GEN(i915, 6) || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
+ u32 params = 0;
+
+ sandybridge_pcode_read(i915, GEN6_READ_OC_PARAMS,
+ &params, NULL);
+ if (params & BIT(31)) { /* OC supported */
+ DRM_DEBUG_DRIVER("Overclocking supported, max: %dMHz, overclock: %dMHz\n",
+ (rps->max_freq & 0xff) * 50,
+ (params & 0xff) * 50);
+ rps->max_freq = params & 0xff;
+ }
+ }
+
+ /* Finally allow us to boost to max by default */
+ rps->boost_freq = rps->max_freq;
+ rps->idle_freq = rps->min_freq;
+ rps->cur_freq = rps->idle_freq;
+
+ rps->pm_intrmsk_mbz = 0;
+
+ /*
+ * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
+ * if GEN6_PM_UP_EI_EXPIRED is masked.
+ *
+ * TODO: verify if this can be reproduced on VLV,CHV.
+ */
+ if (INTEL_GEN(i915) <= 7)
+ rps->pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
+
+ if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11)
+ rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+}
+
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 cagf;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
+ cagf = (rpstat >> 8) & 0xff;
+ else if (INTEL_GEN(i915) >= 9)
+ cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
+ else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ cagf = (rpstat & HSW_CAGF_MASK) >> HSW_CAGF_SHIFT;
+ else
+ cagf = (rpstat & GEN6_CAGF_MASK) >> GEN6_CAGF_SHIFT;
+
+ return cagf;
+}
+
+static u32 read_cagf(struct intel_rps *rps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+ u32 freq;
+
+ if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) {
+ vlv_punit_get(i915);
+ freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
+ vlv_punit_put(i915);
+ } else {
+ freq = intel_uncore_read(rps_to_gt(rps)->uncore, GEN6_RPSTAT1);
+ }
+
+ return intel_rps_get_cagf(rps, freq);
+}
+
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps)
+{
+ struct intel_runtime_pm *rpm = rps_to_gt(rps)->uncore->rpm;
+ intel_wakeref_t wakeref;
+ u32 freq = 0;
+
+ with_intel_runtime_pm_if_in_use(rpm, wakeref)
+ freq = intel_gpu_freq(rps, read_cagf(rps));
+
+ return freq;
+}
+
+/* External interface for intel_ips.ko */
+
+static struct drm_i915_private __rcu *ips_mchdev;
+
+/**
+ * Tells the intel_ips driver that the i915 driver is now loaded, if
+ * IPS got loaded first.
+ *
+ * This awkward dance is so that neither module has to depend on the
+ * other in order for IPS to do the appropriate communication of
+ * GPU turbo limits to i915.
+ */
+static void
+ips_ping_for_i915_load(void)
+{
+ void (*link)(void);
+
+ link = symbol_get(ips_link_to_i915_driver);
+ if (link) {
+ link();
+ symbol_put(ips_link_to_i915_driver);
+ }
+}
+
+void intel_rps_driver_register(struct intel_rps *rps)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+
+ /*
+ * We only register the i915 ips part with intel-ips once everything is
+ * set up, to avoid intel-ips sneaking in and reading bogus values.
+ */
+ if (IS_GEN(gt->i915, 5)) {
+ GEM_BUG_ON(ips_mchdev);
+ rcu_assign_pointer(ips_mchdev, gt->i915);
+ ips_ping_for_i915_load();
+ }
+}
+
+void intel_rps_driver_unregister(struct intel_rps *rps)
+{
+ if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps))
+ rcu_assign_pointer(ips_mchdev, NULL);
+}
+
+static struct drm_i915_private *mchdev_get(void)
+{
+ struct drm_i915_private *i915;
+
+ rcu_read_lock();
+ i915 = rcu_dereference(ips_mchdev);
+ if (!kref_get_unless_zero(&i915->drm.ref))
+ i915 = NULL;
+ rcu_read_unlock();
+
+ return i915;
+}
+
+/**
+ * i915_read_mch_val - return value for IPS use
+ *
+ * Calculate and return a value for the IPS driver to use when deciding whether
+ * we have thermal and power headroom to increase CPU or GPU power budget.
+ */
+unsigned long i915_read_mch_val(void)
+{
+ struct drm_i915_private *i915;
+ unsigned long chipset_val = 0;
+ unsigned long graphics_val = 0;
+ intel_wakeref_t wakeref;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return 0;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ struct intel_ips *ips = &i915->gt.rps.ips;
+
+ spin_lock_irq(&mchdev_lock);
+ chipset_val = __ips_chipset_val(ips);
+ graphics_val = __ips_gfx_val(ips);
+ spin_unlock_irq(&mchdev_lock);
+ }
+
+ drm_dev_put(&i915->drm);
+ return chipset_val + graphics_val;
+}
+EXPORT_SYMBOL_GPL(i915_read_mch_val);
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ if (rps->max_freq_softlimit < rps->max_freq)
+ rps->max_freq_softlimit++;
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_raise);
+
+/**
+ * i915_gpu_lower - lower GPU frequency limit
+ *
+ * IPS indicates we're close to a thermal limit, so throttle back the GPU
+ * frequency maximum.
+ */
+bool i915_gpu_lower(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ if (rps->max_freq_softlimit > rps->min_freq)
+ rps->max_freq_softlimit--;
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_lower);
+
+/**
+ * i915_gpu_busy - indicate GPU business to IPS
+ *
+ * Tell the IPS driver whether or not the GPU is busy.
+ */
+bool i915_gpu_busy(void)
+{
+ struct drm_i915_private *i915;
+ bool ret;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ ret = i915->gt.awake;
+
+ drm_dev_put(&i915->drm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_busy);
+
+/**
+ * i915_gpu_turbo_disable - disable graphics turbo
+ *
+ * Disable graphics turbo by resetting the max frequency and setting the
+ * current frequency to the default.
+ */
+bool i915_gpu_turbo_disable(void)
+{
+ struct drm_i915_private *i915;
+ struct intel_rps *rps;
+ bool ret;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ rps = &i915->gt.rps;
+
+ spin_lock_irq(&mchdev_lock);
+ rps->max_freq_softlimit = rps->min_freq;
+ ret = gen5_rps_set(&i915->gt.rps, rps->min_freq);
+ spin_unlock_irq(&mchdev_lock);
+
+ drm_dev_put(&i915->drm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
new file mode 100644
index 000000000000..dfa98194f3b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_H
+#define INTEL_RPS_H
+
+#include "intel_rps_types.h"
+
+struct i915_request;
+
+void intel_rps_init_early(struct intel_rps *rps);
+void intel_rps_init(struct intel_rps *rps);
+
+void intel_rps_driver_register(struct intel_rps *rps);
+void intel_rps_driver_unregister(struct intel_rps *rps);
+
+void intel_rps_enable(struct intel_rps *rps);
+void intel_rps_disable(struct intel_rps *rps);
+
+void intel_rps_park(struct intel_rps *rps);
+void intel_rps_unpark(struct intel_rps *rps);
+void intel_rps_boost(struct i915_request *rq);
+
+int intel_rps_set(struct intel_rps *rps, u8 val);
+void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
+
+int intel_gpu_freq(struct intel_rps *rps, int val);
+int intel_freq_opcode(struct intel_rps *rps, int val);
+u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat1);
+u32 intel_rps_read_actual_frequency(struct intel_rps *rps);
+
+void gen5_rps_irq_handler(struct intel_rps *rps);
+void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
+
+#endif /* INTEL_RPS_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h
new file mode 100644
index 000000000000..c2e279154bd5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h
@@ -0,0 +1,93 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_RPS_TYPES_H
+#define INTEL_RPS_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/ktime.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct intel_ips {
+ u64 last_count1;
+ unsigned long last_time1;
+ unsigned long chipset_power;
+ u64 last_count2;
+ u64 last_time2;
+ unsigned long gfx_power;
+ u8 corr;
+
+ int c, m;
+};
+
+struct intel_rps_ei {
+ ktime_t ktime;
+ u32 render_c0;
+ u32 media_c0;
+};
+
+struct intel_rps {
+ struct mutex lock; /* protects enabling and the worker */
+
+ /*
+ * work, interrupts_enabled and pm_iir are protected by
+ * dev_priv->irq_lock
+ */
+ struct work_struct work;
+ bool enabled;
+ bool active;
+ u32 pm_iir;
+
+ /* PM interrupt bits that should never be masked */
+ u32 pm_intrmsk_mbz;
+ u32 pm_events;
+
+ /* Frequencies are stored in potentially platform dependent multiples.
+ * In other words, *_freq needs to be multiplied by X to be interesting.
+ * Soft limits are those which are used for the dynamic reclocking done
+ * by the driver (raise frequencies under heavy loads, and lower for
+ * lighter loads). Hard limits are those imposed by the hardware.
+ *
+ * A distinction is made for overclocking, which is never enabled by
+ * default, and is considered to be above the hard limit if it's
+ * possible at all.
+ */
+ u8 cur_freq; /* Current frequency (cached, may not == HW) */
+ u8 last_freq; /* Last SWREQ frequency */
+ u8 min_freq_softlimit; /* Minimum frequency permitted by the driver */
+ u8 max_freq_softlimit; /* Max frequency permitted by the driver */
+ u8 max_freq; /* Maximum frequency, RP0 if not overclocking */
+ u8 min_freq; /* AKA RPn. Minimum frequency */
+ u8 boost_freq; /* Frequency to request when wait boosting */
+ u8 idle_freq; /* Frequency to request when we are idle */
+ u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
+ u8 rp1_freq; /* "less than" RP0 power/freqency */
+ u8 rp0_freq; /* Non-overclocked max frequency. */
+ u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
+
+ int last_adj;
+
+ struct {
+ struct mutex mutex;
+
+ enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
+ unsigned int interactive;
+
+ u8 up_threshold; /* Current %busy required to uplock */
+ u8 down_threshold; /* Current %busy required to downclock */
+ } power;
+
+ atomic_t num_waiters;
+ atomic_t boosts;
+
+ /* manual wa residency calculations */
+ struct intel_rps_ei ei;
+ struct intel_ips ips;
+};
+
+#endif /* INTEL_RPS_TYPES_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index a0756f006f5f..74f793423231 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -8,6 +8,19 @@
#include "intel_lrc_reg.h"
#include "intel_sseu.h"
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice)
+{
+ sseu->max_slices = max_slices;
+ sseu->max_subslices = max_subslices;
+ sseu->max_eus_per_subslice = max_eus_per_subslice;
+
+ sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+ GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
+ sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+ GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
+}
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
{
@@ -19,10 +32,32 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
return total;
}
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+{
+ int i, offset = slice * sseu->ss_stride;
+ u32 mask = 0;
+
+ GEM_BUG_ON(slice >= sseu->max_slices);
+
+ for (i = 0; i < sseu->ss_stride; i++)
+ mask |= (u32)sseu->subslice_mask[offset + i] <<
+ i * BITS_PER_BYTE;
+
+ return mask;
+}
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask)
+{
+ int offset = slice * sseu->ss_stride;
+
+ memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
+}
+
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
{
- return hweight8(sseu->subslice_mask[slice]);
+ return hweight32(intel_sseu_get_subslices(sseu, slice));
}
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
@@ -49,7 +84,7 @@ u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
* cases which disable slices for functional, apart for performance
* reasons. So in this case we select a known stable subset.
*/
- if (!i915->perf.oa.exclusive_stream) {
+ if (!i915->perf.exclusive_stream) {
ctx_sseu = *req_sseu;
} else {
ctx_sseu = intel_sseu_from_device_info(sseu);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index b50d0401a4e2..d1d225204f09 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -10,15 +10,21 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include "i915_gem.h"
+
struct drm_i915_private;
#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
+#define GEN_MAX_EUS (16) /* TGL upper bound */
+#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
struct sseu_dev_info {
u8 slice_mask;
- u8 subslice_mask[GEN_MAX_SLICES];
+ u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
+ u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -33,11 +39,8 @@ struct sseu_dev_info {
u8 max_subslices;
u8 max_eus_per_subslice;
- /* We don't have more than 8 eus per subslice at the moment and as we
- * store eus enabled using bits, no need to multiply by eus per
- * subslice.
- */
- u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
+ u8 ss_stride;
+ u8 eu_stride;
};
/*
@@ -63,12 +66,34 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
return value;
}
+static inline bool
+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+ int subslice)
+{
+ u8 mask;
+ int ss_idx = subslice / BITS_PER_BYTE;
+
+ GEM_BUG_ON(ss_idx >= sseu->ss_stride);
+
+ mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+
+ return mask & BIT(subslice % BITS_PER_BYTE);
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice);
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask);
+
u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
const struct intel_sseu *req_sseu);
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c
new file mode 100644
index 000000000000..87716529cd2f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.c
@@ -0,0 +1,573 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016-2018 Intel Corporation
+ */
+
+#include "i915_drv.h"
+
+#include "i915_active.h"
+#include "i915_syncmap.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
+#include "intel_timeline.h"
+
+#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
+#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
+
+#define CACHELINE_BITS 6
+#define CACHELINE_FREE CACHELINE_BITS
+
+struct intel_timeline_hwsp {
+ struct intel_gt *gt;
+ struct intel_gt_timelines *gt_timelines;
+ struct list_head free_link;
+ struct i915_vma *vma;
+ u64 free_bitmap;
+};
+
+static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
+{
+ struct drm_i915_private *i915 = gt->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma))
+ i915_gem_object_put(obj);
+
+ return vma;
+}
+
+static struct i915_vma *
+hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline)
+{
+ struct intel_gt_timelines *gt = &timeline->gt->timelines;
+ struct intel_timeline_hwsp *hwsp;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE);
+
+ spin_lock_irq(&gt->hwsp_lock);
+
+ /* hwsp_free_list only contains HWSP that have available cachelines */
+ hwsp = list_first_entry_or_null(&gt->hwsp_free_list,
+ typeof(*hwsp), free_link);
+ if (!hwsp) {
+ struct i915_vma *vma;
+
+ spin_unlock_irq(&gt->hwsp_lock);
+
+ hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL);
+ if (!hwsp)
+ return ERR_PTR(-ENOMEM);
+
+ vma = __hwsp_alloc(timeline->gt);
+ if (IS_ERR(vma)) {
+ kfree(hwsp);
+ return vma;
+ }
+
+ vma->private = hwsp;
+ hwsp->gt = timeline->gt;
+ hwsp->vma = vma;
+ hwsp->free_bitmap = ~0ull;
+ hwsp->gt_timelines = gt;
+
+ spin_lock_irq(&gt->hwsp_lock);
+ list_add(&hwsp->free_link, &gt->hwsp_free_list);
+ }
+
+ GEM_BUG_ON(!hwsp->free_bitmap);
+ *cacheline = __ffs64(hwsp->free_bitmap);
+ hwsp->free_bitmap &= ~BIT_ULL(*cacheline);
+ if (!hwsp->free_bitmap)
+ list_del(&hwsp->free_link);
+
+ spin_unlock_irq(&gt->hwsp_lock);
+
+ GEM_BUG_ON(hwsp->vma->private != hwsp);
+ return hwsp->vma;
+}
+
+static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline)
+{
+ struct intel_gt_timelines *gt = hwsp->gt_timelines;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gt->hwsp_lock, flags);
+
+ /* As a cacheline becomes available, publish the HWSP on the freelist */
+ if (!hwsp->free_bitmap)
+ list_add_tail(&hwsp->free_link, &gt->hwsp_free_list);
+
+ GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap));
+ hwsp->free_bitmap |= BIT_ULL(cacheline);
+
+ /* And if no one is left using it, give the page back to the system */
+ if (hwsp->free_bitmap == ~0ull) {
+ i915_vma_put(hwsp->vma);
+ list_del(&hwsp->free_link);
+ kfree(hwsp);
+ }
+
+ spin_unlock_irqrestore(&gt->hwsp_lock, flags);
+}
+
+static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(!i915_active_is_idle(&cl->active));
+
+ i915_gem_object_unpin_map(cl->hwsp->vma->obj);
+ i915_vma_put(cl->hwsp->vma);
+ __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
+
+ i915_active_fini(&cl->active);
+ kfree_rcu(cl, rcu);
+}
+
+__i915_active_call
+static void __cacheline_retire(struct i915_active *active)
+{
+ struct intel_timeline_cacheline *cl =
+ container_of(active, typeof(*cl), active);
+
+ i915_vma_unpin(cl->hwsp->vma);
+ if (ptr_test_bit(cl->vaddr, CACHELINE_FREE))
+ __idle_cacheline_free(cl);
+}
+
+static int __cacheline_active(struct i915_active *active)
+{
+ struct intel_timeline_cacheline *cl =
+ container_of(active, typeof(*cl), active);
+
+ __i915_vma_pin(cl->hwsp->vma);
+ return 0;
+}
+
+static struct intel_timeline_cacheline *
+cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
+{
+ struct intel_timeline_cacheline *cl;
+ void *vaddr;
+
+ GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS));
+
+ cl = kmalloc(sizeof(*cl), GFP_KERNEL);
+ if (!cl)
+ return ERR_PTR(-ENOMEM);
+
+ vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ kfree(cl);
+ return ERR_CAST(vaddr);
+ }
+
+ i915_vma_get(hwsp->vma);
+ cl->hwsp = hwsp;
+ cl->vaddr = page_pack_bits(vaddr, cacheline);
+
+ i915_active_init(&cl->active, __cacheline_active, __cacheline_retire);
+
+ return cl;
+}
+
+static void cacheline_acquire(struct intel_timeline_cacheline *cl)
+{
+ if (cl)
+ i915_active_acquire(&cl->active);
+}
+
+static void cacheline_release(struct intel_timeline_cacheline *cl)
+{
+ if (cl)
+ i915_active_release(&cl->active);
+}
+
+static void cacheline_free(struct intel_timeline_cacheline *cl)
+{
+ GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE));
+ cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE);
+
+ if (i915_active_is_idle(&cl->active))
+ __idle_cacheline_free(cl);
+}
+
+int intel_timeline_init(struct intel_timeline *timeline,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp)
+{
+ void *vaddr;
+
+ kref_init(&timeline->kref);
+ atomic_set(&timeline->pin_count, 0);
+
+ timeline->gt = gt;
+
+ timeline->has_initial_breadcrumb = !hwsp;
+ timeline->hwsp_cacheline = NULL;
+
+ if (!hwsp) {
+ struct intel_timeline_cacheline *cl;
+ unsigned int cacheline;
+
+ hwsp = hwsp_alloc(timeline, &cacheline);
+ if (IS_ERR(hwsp))
+ return PTR_ERR(hwsp);
+
+ cl = cacheline_alloc(hwsp->private, cacheline);
+ if (IS_ERR(cl)) {
+ __idle_hwsp_free(hwsp->private, cacheline);
+ return PTR_ERR(cl);
+ }
+
+ timeline->hwsp_cacheline = cl;
+ timeline->hwsp_offset = cacheline * CACHELINE_BYTES;
+
+ vaddr = page_mask_bits(cl->vaddr);
+ } else {
+ timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR;
+
+ vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+ }
+
+ timeline->hwsp_seqno =
+ memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES);
+
+ timeline->hwsp_ggtt = i915_vma_get(hwsp);
+ GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size);
+
+ timeline->fence_context = dma_fence_context_alloc(1);
+
+ mutex_init(&timeline->mutex);
+
+ INIT_ACTIVE_FENCE(&timeline->last_request);
+ INIT_LIST_HEAD(&timeline->requests);
+
+ i915_syncmap_init(&timeline->sync);
+
+ return 0;
+}
+
+void intel_gt_init_timelines(struct intel_gt *gt)
+{
+ struct intel_gt_timelines *timelines = &gt->timelines;
+
+ spin_lock_init(&timelines->lock);
+ INIT_LIST_HEAD(&timelines->active_list);
+
+ spin_lock_init(&timelines->hwsp_lock);
+ INIT_LIST_HEAD(&timelines->hwsp_free_list);
+}
+
+void intel_timeline_fini(struct intel_timeline *timeline)
+{
+ GEM_BUG_ON(atomic_read(&timeline->pin_count));
+ GEM_BUG_ON(!list_empty(&timeline->requests));
+ GEM_BUG_ON(timeline->retire);
+
+ if (timeline->hwsp_cacheline)
+ cacheline_free(timeline->hwsp_cacheline);
+ else
+ i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj);
+
+ i915_vma_put(timeline->hwsp_ggtt);
+}
+
+struct intel_timeline *
+intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp)
+{
+ struct intel_timeline *timeline;
+ int err;
+
+ timeline = kzalloc(sizeof(*timeline), GFP_KERNEL);
+ if (!timeline)
+ return ERR_PTR(-ENOMEM);
+
+ err = intel_timeline_init(timeline, gt, global_hwsp);
+ if (err) {
+ kfree(timeline);
+ return ERR_PTR(err);
+ }
+
+ return timeline;
+}
+
+int intel_timeline_pin(struct intel_timeline *tl)
+{
+ int err;
+
+ if (atomic_add_unless(&tl->pin_count, 1, 0))
+ return 0;
+
+ err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err)
+ return err;
+
+ tl->hwsp_offset =
+ i915_ggtt_offset(tl->hwsp_ggtt) +
+ offset_in_page(tl->hwsp_offset);
+
+ cacheline_acquire(tl->hwsp_cacheline);
+ if (atomic_fetch_inc(&tl->pin_count)) {
+ cacheline_release(tl->hwsp_cacheline);
+ __i915_vma_unpin(tl->hwsp_ggtt);
+ }
+
+ return 0;
+}
+
+void intel_timeline_enter(struct intel_timeline *tl)
+{
+ struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+ /*
+ * Pretend we are serialised by the timeline->mutex.
+ *
+ * While generally true, there are a few exceptions to the rule
+ * for the engine->kernel_context being used to manage power
+ * transitions. As the engine_park may be called from under any
+ * timeline, it uses the power mutex as a global serialisation
+ * lock to prevent any other request entering its timeline.
+ *
+ * The rule is generally tl->mutex, otherwise engine->wakeref.mutex.
+ *
+ * However, intel_gt_retire_request() does not know which engine
+ * it is retiring along and so cannot partake in the engine-pm
+ * barrier, and there we use the tl->active_count as a means to
+ * pin the timeline in the active_list while the locks are dropped.
+ * Ergo, as that is outside of the engine-pm barrier, we need to
+ * use atomic to manipulate tl->active_count.
+ */
+ lockdep_assert_held(&tl->mutex);
+
+ if (atomic_add_unless(&tl->active_count, 1, 0))
+ return;
+
+ spin_lock(&timelines->lock);
+ if (!atomic_fetch_inc(&tl->active_count))
+ list_add_tail(&tl->link, &timelines->active_list);
+ spin_unlock(&timelines->lock);
+}
+
+void intel_timeline_exit(struct intel_timeline *tl)
+{
+ struct intel_gt_timelines *timelines = &tl->gt->timelines;
+
+ /* See intel_timeline_enter() */
+ lockdep_assert_held(&tl->mutex);
+
+ GEM_BUG_ON(!atomic_read(&tl->active_count));
+ if (atomic_add_unless(&tl->active_count, -1, 1))
+ return;
+
+ spin_lock(&timelines->lock);
+ if (atomic_dec_and_test(&tl->active_count))
+ list_del(&tl->link);
+ spin_unlock(&timelines->lock);
+
+ /*
+ * Since this timeline is idle, all bariers upon which we were waiting
+ * must also be complete and so we can discard the last used barriers
+ * without loss of information.
+ */
+ i915_syncmap_free(&tl->sync);
+}
+
+static u32 timeline_advance(struct intel_timeline *tl)
+{
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb);
+
+ return tl->seqno += 1 + tl->has_initial_breadcrumb;
+}
+
+static void timeline_rollback(struct intel_timeline *tl)
+{
+ tl->seqno -= 1 + tl->has_initial_breadcrumb;
+}
+
+static noinline int
+__intel_timeline_get_seqno(struct intel_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ struct intel_timeline_cacheline *cl;
+ unsigned int cacheline;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err;
+
+ /*
+ * If there is an outstanding GPU reference to this cacheline,
+ * such as it being sampled by a HW semaphore on another timeline,
+ * we cannot wraparound our seqno value (the HW semaphore does
+ * a strict greater-than-or-equals compare, not i915_seqno_passed).
+ * So if the cacheline is still busy, we must detach ourselves
+ * from it and leave it inflight alongside its users.
+ *
+ * However, if nobody is watching and we can guarantee that nobody
+ * will, we could simply reuse the same cacheline.
+ *
+ * if (i915_active_request_is_signaled(&tl->last_request) &&
+ * i915_active_is_signaled(&tl->hwsp_cacheline->active))
+ * return 0;
+ *
+ * That seems unlikely for a busy timeline that needed to wrap in
+ * the first place, so just replace the cacheline.
+ */
+
+ vma = hwsp_alloc(tl, &cacheline);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_rollback;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (err) {
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_rollback;
+ }
+
+ cl = cacheline_alloc(vma->private, cacheline);
+ if (IS_ERR(cl)) {
+ err = PTR_ERR(cl);
+ __idle_hwsp_free(vma->private, cacheline);
+ goto err_unpin;
+ }
+ GEM_BUG_ON(cl->hwsp->vma != vma);
+
+ /*
+ * Attach the old cacheline to the current request, so that we only
+ * free it after the current request is retired, which ensures that
+ * all writes into the cacheline from previous requests are complete.
+ */
+ err = i915_active_ref(&tl->hwsp_cacheline->active, tl, &rq->fence);
+ if (err)
+ goto err_cacheline;
+
+ cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */
+ cacheline_free(tl->hwsp_cacheline);
+
+ i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */
+ i915_vma_put(tl->hwsp_ggtt);
+
+ tl->hwsp_ggtt = i915_vma_get(vma);
+
+ vaddr = page_mask_bits(cl->vaddr);
+ tl->hwsp_offset = cacheline * CACHELINE_BYTES;
+ tl->hwsp_seqno =
+ memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES);
+
+ tl->hwsp_offset += i915_ggtt_offset(vma);
+
+ cacheline_acquire(cl);
+ tl->hwsp_cacheline = cl;
+
+ *seqno = timeline_advance(tl);
+ GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno));
+ return 0;
+
+err_cacheline:
+ cacheline_free(cl);
+err_unpin:
+ i915_vma_unpin(vma);
+err_rollback:
+ timeline_rollback(tl);
+ return err;
+}
+
+int intel_timeline_get_seqno(struct intel_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno)
+{
+ *seqno = timeline_advance(tl);
+
+ /* Replace the HWSP on wraparound for HW semaphores */
+ if (unlikely(!*seqno && tl->hwsp_cacheline))
+ return __intel_timeline_get_seqno(tl, rq, seqno);
+
+ return 0;
+}
+
+static int cacheline_ref(struct intel_timeline_cacheline *cl,
+ struct i915_request *rq)
+{
+ return i915_active_add_request(&cl->active, rq);
+}
+
+int intel_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *to,
+ u32 *hwsp)
+{
+ struct intel_timeline_cacheline *cl;
+ int err;
+
+ GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
+
+ rcu_read_lock();
+ cl = rcu_dereference(from->hwsp_cacheline);
+ if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
+ goto unlock; /* seqno wrapped and completed! */
+ if (unlikely(i915_request_completed(from)))
+ goto release;
+ rcu_read_unlock();
+
+ err = cacheline_ref(cl, to);
+ if (err)
+ goto out;
+
+ *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
+ ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
+
+out:
+ i915_active_release(&cl->active);
+ return err;
+
+release:
+ i915_active_release(&cl->active);
+unlock:
+ rcu_read_unlock();
+ return 1;
+}
+
+void intel_timeline_unpin(struct intel_timeline *tl)
+{
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ if (!atomic_dec_and_test(&tl->pin_count))
+ return;
+
+ cacheline_release(tl->hwsp_cacheline);
+
+ __i915_vma_unpin(tl->hwsp_ggtt);
+}
+
+void __intel_timeline_free(struct kref *kref)
+{
+ struct intel_timeline *timeline =
+ container_of(kref, typeof(*timeline), kref);
+
+ intel_timeline_fini(timeline);
+ kfree_rcu(timeline, rcu);
+}
+
+void intel_gt_fini_timelines(struct intel_gt *gt)
+{
+ struct intel_gt_timelines *timelines = &gt->timelines;
+
+ GEM_BUG_ON(!list_empty(&timelines->active_list));
+ GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "gt/selftests/mock_timeline.c"
+#include "gt/selftest_timeline.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h
new file mode 100644
index 000000000000..f5b7eade3809
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_timeline.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef I915_TIMELINE_H
+#define I915_TIMELINE_H
+
+#include <linux/lockdep.h>
+
+#include "i915_active.h"
+#include "i915_syncmap.h"
+#include "gt/intel_timeline_types.h"
+
+int intel_timeline_init(struct intel_timeline *tl,
+ struct intel_gt *gt,
+ struct i915_vma *hwsp);
+void intel_timeline_fini(struct intel_timeline *tl);
+
+struct intel_timeline *
+intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp);
+
+static inline struct intel_timeline *
+intel_timeline_get(struct intel_timeline *timeline)
+{
+ kref_get(&timeline->kref);
+ return timeline;
+}
+
+void __intel_timeline_free(struct kref *kref);
+static inline void intel_timeline_put(struct intel_timeline *timeline)
+{
+ kref_put(&timeline->kref, __intel_timeline_free);
+}
+
+static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
+ u64 context, u32 seqno)
+{
+ return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int intel_timeline_sync_set(struct intel_timeline *tl,
+ const struct dma_fence *fence)
+{
+ return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
+ u64 context, u32 seqno)
+{
+ return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
+ const struct dma_fence *fence)
+{
+ return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
+int intel_timeline_pin(struct intel_timeline *tl);
+void intel_timeline_enter(struct intel_timeline *tl);
+int intel_timeline_get_seqno(struct intel_timeline *tl,
+ struct i915_request *rq,
+ u32 *seqno);
+void intel_timeline_exit(struct intel_timeline *tl);
+void intel_timeline_unpin(struct intel_timeline *tl);
+
+int intel_timeline_read_hwsp(struct i915_request *from,
+ struct i915_request *until,
+ u32 *hwsp_offset);
+
+void intel_gt_init_timelines(struct intel_gt *gt);
+void intel_gt_fini_timelines(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
new file mode 100644
index 000000000000..02181c5020db
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h
@@ -0,0 +1,100 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_TIMELINE_TYPES_H__
+#define __I915_TIMELINE_TYPES_H__
+
+#include <linux/list.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+#include "i915_active_types.h"
+
+struct i915_vma;
+struct i915_syncmap;
+struct intel_gt;
+struct intel_timeline_hwsp;
+
+struct intel_timeline {
+ u64 fence_context;
+ u32 seqno;
+
+ struct mutex mutex; /* protects the flow of requests */
+
+ /*
+ * pin_count and active_count track essentially the same thing:
+ * How many requests are in flight or may be under construction.
+ *
+ * We need two distinct counters so that we can assign different
+ * lifetimes to the events for different use-cases. For example,
+ * we want to permanently keep the timeline pinned for the kernel
+ * context so that we can issue requests at any time without having
+ * to acquire space in the GGTT. However, we want to keep tracking
+ * the activity (to be able to detect when we become idle) along that
+ * permanently pinned timeline and so end up requiring two counters.
+ *
+ * Note that the active_count is protected by the intel_timeline.mutex,
+ * but the pin_count is protected by a combination of serialisation
+ * from the intel_context caller plus internal atomicity.
+ */
+ atomic_t pin_count;
+ atomic_t active_count;
+
+ const u32 *hwsp_seqno;
+ struct i915_vma *hwsp_ggtt;
+ u32 hwsp_offset;
+
+ struct intel_timeline_cacheline *hwsp_cacheline;
+
+ bool has_initial_breadcrumb;
+
+ /**
+ * List of breadcrumbs associated with GPU requests currently
+ * outstanding.
+ */
+ struct list_head requests;
+
+ /*
+ * Contains an RCU guarded pointer to the last request. No reference is
+ * held to the request, users must carefully acquire a reference to
+ * the request using i915_active_fence_get(), or manage the RCU
+ * protection themselves (cf the i915_active_fence API).
+ */
+ struct i915_active_fence last_request;
+
+ /** A chain of completed timelines ready for early retirement. */
+ struct intel_timeline *retire;
+
+ /**
+ * We track the most recent seqno that we wait on in every context so
+ * that we only have to emit a new await and dependency on a more
+ * recent sync point. As the contexts may be executed out-of-order, we
+ * have to track each individually and can not rely on an absolute
+ * global_seqno. When we know that all tracked fences are completed
+ * (i.e. when the driver is idle), we know that the syncmap is
+ * redundant and we can discard it without loss of generality.
+ */
+ struct i915_syncmap *sync;
+
+ struct list_head link;
+ struct intel_gt *gt;
+
+ struct kref kref;
+ struct rcu_head rcu;
+};
+
+struct intel_timeline_cacheline {
+ struct i915_active active;
+
+ struct intel_timeline_hwsp *hwsp;
+ void *vaddr;
+
+ struct rcu_head rcu;
+};
+
+#endif /* __I915_TIMELINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 98dfb086320f..4e292d4bf7b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -6,6 +6,9 @@
#include "i915_drv.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_ring.h"
#include "intel_workarounds.h"
/**
@@ -49,9 +52,10 @@
* - Public functions to init or apply the given workaround type.
*/
-static void wa_init_start(struct i915_wa_list *wal, const char *name)
+static void wa_init_start(struct i915_wa_list *wal, const char *name, const char *engine_name)
{
wal->name = name;
+ wal->engine_name = engine_name;
}
#define WA_LIST_CHUNK (1 << 4)
@@ -73,8 +77,8 @@ static void wa_init_finish(struct i915_wa_list *wal)
if (!wal->count)
return;
- DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
- wal->wa_count, wal->name);
+ DRM_DEBUG_DRIVER("Initialized %u %s workarounds on %s\n",
+ wal->wa_count, wal->name, wal->engine_name);
}
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
@@ -143,21 +147,27 @@ static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
}
}
-static void
-wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
- u32 val)
+static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val, u32 read_mask)
{
struct i915_wa wa = {
.reg = reg,
.mask = mask,
.val = val,
- .read = mask,
+ .read = read_mask,
};
_wa_add(wal, &wa);
}
static void
+wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
+ u32 val)
+{
+ wa_add(wal, reg, mask, val, mask);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
@@ -175,19 +185,6 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
wa_write_masked_or(wal, reg, val, val);
}
-static void
-ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
-{
- struct i915_wa wa = {
- .reg = reg,
- .mask = mask,
- .val = val,
- /* Bonkers HW, skip verifying */
- };
-
- _wa_add(wal, &wa);
-}
-
#define WA_SET_BIT_MASKED(addr, mask) \
wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
@@ -257,7 +254,7 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaDisableDopClockGating:bdw
*
- * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
+ * Also see the related UCGTCL1 write in bdw_init_clock_gating()
* to disable EUTC clock gating.
*/
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
@@ -308,11 +305,6 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
FLOW_CONTROL_ENABLE |
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
- /* Syncing dependencies between camera and graphics:skl,bxt,kbl */
- if (!IS_COFFEELAKE(i915))
- WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
- GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
-
/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
@@ -536,12 +528,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
GEN8_ERRDETBCTRL);
- /* WaDisableBankHangMode:icl */
- wa_write(wal,
- GEN8_L3CNTLREG,
- intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
- GEN8_ERRDETBCTRL);
-
/* Wa_1604370585:icl (pre-prod)
* Formerly known as WaPushConstantDereferenceHoldDisable
*/
@@ -586,6 +572,29 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
}
+static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ u32 val;
+
+ /* Wa_1409142259:tgl */
+ WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
+ GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+
+ /* Wa_1604555607:tgl */
+ val = intel_uncore_read(engine->uncore, FF_MODE2);
+ val &= ~FF_MODE2_TDS_TIMER_MASK;
+ val |= FF_MODE2_TDS_TIMER_128;
+ /*
+ * FIXME: FF_MODE2 register is not readable till TGL B0. We can
+ * enable verification of WA from the later steppings, which enables
+ * the read of FF_MODE2.
+ */
+ wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK, val,
+ IS_TGL_REVID(engine->i915, TGL_REVID_A0, TGL_REVID_A0) ? 0 :
+ FF_MODE2_TDS_TIMER_MASK);
+}
+
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
struct i915_wa_list *wal,
@@ -596,9 +605,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
return;
- wa_init_start(wal, name);
+ wa_init_start(wal, name, engine->name);
- if (IS_GEN(i915, 11))
+ if (IS_GEN(i915, 12))
+ tgl_ctx_workarounds_init(engine, wal);
+ else if (IS_GEN(i915, 11))
icl_ctx_workarounds_init(engine, wal);
else if (IS_CANNONLAKE(i915))
cnl_ctx_workarounds_init(engine, wal);
@@ -766,7 +777,10 @@ static void
wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
- u32 mcr_slice_subslice_mask;
+ unsigned int slice, subslice;
+ u32 l3_en, mcr, mcr_mask;
+
+ GEM_BUG_ON(INTEL_GEN(i915) < 10);
/*
* WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
@@ -774,42 +788,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
* the case, we might need to program MCR select to a valid L3Bank
* by default, to make sure we correctly read certain registers
* later on (in the range 0xB100 - 0xB3FF).
- * This might be incompatible with
- * WaProgramMgsrForCorrectSliceSpecificMmioReads.
- * Fortunately, this should not happen in production hardware, so
- * we only assert that this is the case (instead of implementing
- * something more complex that requires checking the range of every
- * MMIO read).
- */
- if (INTEL_GEN(i915) >= 10 &&
- is_power_of_2(sseu->slice_mask)) {
- /*
- * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
- * enabled subslice, no need to redirect MCR packet
- */
- u32 slice = fls(sseu->slice_mask);
- u32 fuse3 =
- intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
- u8 ss_mask = sseu->subslice_mask[slice];
-
- u8 enabled_mask = (ss_mask | ss_mask >>
- GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
- u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;
-
- /*
- * Production silicon should have matched L3Bank and
- * subslice enabled
- */
- WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
- }
-
- if (INTEL_GEN(i915) >= 11)
- mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
- GEN11_MCR_SUBSLICE_MASK;
- else
- mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
- GEN8_MCR_SUBSLICE_MASK;
- /*
+ *
* WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
* Before any MMIO read into slice/subslice specific registers, MCR
* packet control register needs to be programmed to point to any
@@ -819,11 +798,50 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
* are consistent across s/ss in almost all cases. In the rare
* occasions, such as INSTDONE, where this value is dependent
* on s/ss combo, the read should be done with read_subslice_reg.
+ *
+ * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both
+ * to which subslice, or to which L3 bank, the respective mmio reads
+ * will go, we have to find a common index which works for both
+ * accesses.
+ *
+ * Case where we cannot find a common index fortunately should not
+ * happen in production hardware, so we only emit a warning instead of
+ * implementing something more complex that requires checking the range
+ * of every MMIO read.
*/
- wa_write_masked_or(wal,
- GEN8_MCR_SELECTOR,
- mcr_slice_subslice_mask,
- intel_calculate_mcr_s_ss_select(i915));
+
+ if (INTEL_GEN(i915) >= 10 && is_power_of_2(sseu->slice_mask)) {
+ u32 l3_fuse =
+ intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) &
+ GEN10_L3BANK_MASK;
+
+ DRM_DEBUG_DRIVER("L3 fuse = %x\n", l3_fuse);
+ l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse);
+ } else {
+ l3_en = ~0;
+ }
+
+ slice = fls(sseu->slice_mask) - 1;
+ subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
+ if (!subslice) {
+ DRM_WARN("No common index found between subslice mask %x and L3 bank mask %x!\n",
+ intel_sseu_get_subslices(sseu, slice), l3_en);
+ subslice = fls(l3_en);
+ WARN_ON(!subslice);
+ }
+ subslice--;
+
+ if (INTEL_GEN(i915) >= 11) {
+ mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
+ mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
+ } else {
+ mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+ mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
+ }
+
+ DRM_DEBUG_DRIVER("MCR slice/subslice = %x\n", mcr);
+
+ wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
}
static void
@@ -897,12 +915,35 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
wa_write_or(wal,
GAMT_CHKN_BIT_REG,
GAMT_CHKN_DISABLE_L3_COH_PIPE);
+
+ /* Wa_1607087056:icl */
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
+}
+
+static void
+tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* Wa_1409420604:tgl */
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ wa_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
+
+ /* Wa_1409180338:tgl */
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
+ wa_write_or(wal,
+ SLICE_UNIT_LEVEL_CLKGATE,
+ L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
}
static void
gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
- if (IS_GEN(i915, 11))
+ if (IS_GEN(i915, 12))
+ tgl_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 11))
icl_gt_workarounds_init(i915, wal);
else if (IS_CANNONLAKE(i915))
cnl_gt_workarounds_init(i915, wal);
@@ -926,7 +967,7 @@ void intel_gt_init_workarounds(struct drm_i915_private *i915)
{
struct i915_wa_list *wal = &i915->gt_wa_list;
- wa_init_start(wal, "GT");
+ wa_init_start(wal, "GT", "global");
gt_init_workarounds(i915, wal);
wa_init_finish(wal);
}
@@ -990,9 +1031,9 @@ wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
spin_unlock_irqrestore(&uncore->lock, flags);
}
-void intel_gt_apply_workarounds(struct drm_i915_private *i915)
+void intel_gt_apply_workarounds(struct intel_gt *gt)
{
- wa_list_apply(&i915->uncore, &i915->gt_wa_list);
+ wa_list_apply(gt->uncore, &gt->i915->gt_wa_list);
}
static bool wa_list_verify(struct intel_uncore *uncore,
@@ -1011,10 +1052,23 @@ static bool wa_list_verify(struct intel_uncore *uncore,
return ok;
}
-bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
- const char *from)
+bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
{
- return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
+ return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
+}
+
+static inline bool is_nonpriv_flags_valid(u32 flags)
+{
+ /* Check only valid flag bits are set */
+ if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
+ return false;
+
+ /* NB: Only 3 out of 4 enum values are valid for access field */
+ if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
+ RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
+ return false;
+
+ return true;
}
static void
@@ -1027,6 +1081,9 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
return;
+ if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
+ return;
+
wa.reg.reg |= flags;
_wa_add(wal, &wa);
}
@@ -1034,7 +1091,7 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
- whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
+ whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
}
static void gen9_whitelist_build(struct i915_wa_list *w)
@@ -1047,6 +1104,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1);
+
+ /* WaSendPushConstantsFromMMIO:skl,bxt */
+ whitelist_reg(w, COMMON_SLICE_CHICKEN2);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
@@ -1115,7 +1175,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine)
* - PS_DEPTH_COUNT_UDW
*/
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
- RING_FORCE_TO_NONPRIV_RD |
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4);
}
@@ -1155,22 +1215,46 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
* - PS_DEPTH_COUNT_UDW
*/
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
- RING_FORCE_TO_NONPRIV_RD |
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4);
break;
case VIDEO_DECODE_CLASS:
/* hucStatusRegOffset */
whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
- RING_FORCE_TO_NONPRIV_RD);
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
/* hucUKernelHdrInfoRegOffset */
whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
- RING_FORCE_TO_NONPRIV_RD);
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
/* hucStatus2RegOffset */
whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
- RING_FORCE_TO_NONPRIV_RD);
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
+ break;
+
+ default:
break;
+ }
+}
+
+static void tgl_whitelist_build(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *w = &engine->whitelist;
+ switch (engine->class) {
+ case RENDER_CLASS:
+ /*
+ * WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
+ *
+ * This covers 4 registers which are next to one another :
+ * - PS_INVOCATION_COUNT
+ * - PS_INVOCATION_COUNT_UDW
+ * - PS_DEPTH_COUNT
+ * - PS_DEPTH_COUNT_UDW
+ */
+ whitelist_reg_ext(w, PS_INVOCATION_COUNT,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4);
+ break;
default:
break;
}
@@ -1181,9 +1265,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
struct drm_i915_private *i915 = engine->i915;
struct i915_wa_list *w = &engine->whitelist;
- wa_init_start(w, "whitelist");
+ wa_init_start(w, "whitelist", engine->name);
- if (IS_GEN(i915, 11))
+ if (IS_GEN(i915, 12))
+ tgl_whitelist_build(engine);
+ else if (IS_GEN(i915, 11))
icl_whitelist_build(engine);
else if (IS_CANNONLAKE(i915))
cnl_whitelist_build(engine);
@@ -1233,6 +1319,34 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = engine->i915;
+ if (IS_TGL_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) {
+ /* Wa_1606700617:tgl */
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
+
+ /* Wa_1607138336:tgl */
+ wa_write_or(wal,
+ GEN9_CTX_PREEMPT_REG,
+ GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
+
+ /* Wa_1607030317:tgl */
+ /* Wa_1607186500:tgl */
+ /* Wa_1607297627:tgl */
+ wa_masked_en(wal,
+ GEN6_RC_SLEEP_PSMI_CONTROL,
+ GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
+ GEN8_RC_SEMA_IDLE_MSG_DISABLE);
+
+ /*
+ * Wa_1606679103:tgl
+ * (see also Wa_1606682166:icl)
+ */
+ wa_write_or(wal,
+ GEN7_SARCHKMD,
+ GEN7_DISABLE_SAMPLER_PREFETCH);
+ }
+
if (IS_GEN(i915, 11)) {
/* This is not an Wa. Enable for better image quality */
wa_masked_en(wal,
@@ -1240,10 +1354,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
/* WaPipelineFlushCoherentLines:icl */
- ignore_wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN8_LQSC_FLUSH_COHERENT_LINES,
- GEN8_LQSC_FLUSH_COHERENT_LINES);
+ wa_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN8_LQSC_FLUSH_COHERENT_LINES);
/*
* Wa_1405543622:icl
@@ -1270,10 +1383,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_1405733216:icl
* Formerly known as WaDisableCleanEvicts
*/
- ignore_wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN11_LQSC_CLEAN_EVICT_DISABLE,
- GEN11_LQSC_CLEAN_EVICT_DISABLE);
+ wa_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN11_LQSC_CLEAN_EVICT_DISABLE);
/* WaForwardProgressSoftReset:icl */
wa_write_or(wal,
@@ -1292,6 +1404,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal,
GEN7_SARCHKMD,
GEN7_DISABLE_SAMPLER_PREFETCH);
+
+ /* Wa_1409178092:icl */
+ wa_write_masked_or(wal,
+ GEN11_SCRATCH2,
+ GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
+ 0);
}
if (IS_GEN_RANGE(i915, 9, 11)) {
@@ -1360,7 +1478,7 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
if (I915_SELFTEST_ONLY(INTEL_GEN(engine->i915) < 8))
return;
- if (engine->id == RCS0)
+ if (engine->class == RENDER_CLASS)
rcs_engine_wa_init(engine, wal);
else
xcs_engine_wa_init(engine, wal);
@@ -1370,10 +1488,10 @@ void intel_engine_init_workarounds(struct intel_engine_cs *engine)
{
struct i915_wa_list *wal = &engine->wa_list;
- if (GEM_WARN_ON(INTEL_GEN(engine->i915) < 8))
+ if (INTEL_GEN(engine->i915) < 8)
return;
- wa_init_start(wal, engine->name);
+ wa_init_start(wal, "engine", engine->name);
engine_init_workarounds(engine, wal);
wa_init_finish(wal);
}
@@ -1416,26 +1534,50 @@ err_obj:
return ERR_PTR(err);
}
+static bool mcr_range(struct drm_i915_private *i915, u32 offset)
+{
+ /*
+ * Registers in this range are affected by the MCR selector
+ * which only controls CPU initiated MMIO. Routing does not
+ * work for CS access so we cannot verify them on this path.
+ */
+ if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff))
+ return true;
+
+ return false;
+}
+
static int
wa_list_srm(struct i915_request *rq,
const struct i915_wa_list *wal,
struct i915_vma *vma)
{
+ struct drm_i915_private *i915 = rq->i915;
+ unsigned int i, count = 0;
const struct i915_wa *wa;
- unsigned int i;
u32 srm, *cs;
srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
- if (INTEL_GEN(rq->i915) >= 8)
+ if (INTEL_GEN(i915) >= 8)
srm++;
- cs = intel_ring_begin(rq, 4 * wal->count);
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
+ count++;
+ }
+
+ cs = intel_ring_begin(rq, 4 * count);
if (IS_ERR(cs))
return PTR_ERR(cs);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ u32 offset = i915_mmio_reg_offset(wa->reg);
+
+ if (mcr_range(i915, offset))
+ continue;
+
*cs++ = srm;
- *cs++ = i915_mmio_reg_offset(wa->reg);
+ *cs++ = offset;
*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
*cs++ = 0;
}
@@ -1458,11 +1600,13 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (!wal->count)
return 0;
- vma = create_scratch(&ce->engine->i915->ggtt.vm, wal->count);
+ vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count);
if (IS_ERR(vma))
return PTR_ERR(vma);
+ intel_engine_pm_get(ce->engine);
rq = intel_context_create_request(ce);
+ intel_engine_pm_put(ce->engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_vma;
@@ -1472,25 +1616,32 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (err)
goto err_vma;
+ i915_request_get(rq);
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
err = -ETIME;
- goto err_vma;
+ goto err_rq;
}
results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
if (IS_ERR(results)) {
err = PTR_ERR(results);
- goto err_vma;
+ goto err_rq;
}
err = 0;
- for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+ for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
+ if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
+ continue;
+
if (!wa_verify(wa, results[i], wal->name, from))
err = -ENXIO;
+ }
i915_gem_object_unpin_map(vma->obj);
+err_rq:
+ i915_request_put(rq);
err_vma:
i915_vma_unpin(vma);
i915_vma_put(vma);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.h b/drivers/gpu/drm/i915/gt/intel_workarounds.h
index 3761a6ee58bb..8c9c769c2204 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.h
@@ -14,6 +14,7 @@
struct drm_i915_private;
struct i915_request;
struct intel_engine_cs;
+struct intel_gt;
static inline void intel_wa_list_free(struct i915_wa_list *wal)
{
@@ -25,9 +26,8 @@ void intel_engine_init_ctx_wa(struct intel_engine_cs *engine);
int intel_engine_emit_ctx_wa(struct i915_request *rq);
void intel_gt_init_workarounds(struct drm_i915_private *i915);
-void intel_gt_apply_workarounds(struct drm_i915_private *i915);
-bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
- const char *from);
+void intel_gt_apply_workarounds(struct intel_gt *gt);
+bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from);
void intel_engine_init_whitelist(struct intel_engine_cs *engine);
void intel_engine_apply_whitelist(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index 42ac1fb99572..e27ab1b710b3 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -20,6 +20,7 @@ struct i915_wa {
struct i915_wa_list {
const char *name;
+ const char *engine_name;
struct i915_wa *list;
unsigned int count;
unsigned int wa_count;
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c
index 486c6953dcb1..f2806381733f 100644
--- a/drivers/gpu/drm/i915/gt/mock_engine.c
+++ b/drivers/gpu/drm/i915/gt/mock_engine.c
@@ -23,62 +23,59 @@
*/
#include "gem/i915_gem_context.h"
+#include "gt/intel_ring.h"
#include "i915_drv.h"
#include "intel_context.h"
#include "intel_engine_pm.h"
+#include "intel_engine_pool.h"
#include "mock_engine.h"
#include "selftests/mock_request.h"
-struct mock_ring {
- struct intel_ring base;
- struct i915_timeline timeline;
-};
-
-static void mock_timeline_pin(struct i915_timeline *tl)
+static void mock_timeline_pin(struct intel_timeline *tl)
{
- tl->pin_count++;
+ atomic_inc(&tl->pin_count);
}
-static void mock_timeline_unpin(struct i915_timeline *tl)
+static void mock_timeline_unpin(struct intel_timeline *tl)
{
- GEM_BUG_ON(!tl->pin_count);
- tl->pin_count--;
+ GEM_BUG_ON(!atomic_read(&tl->pin_count));
+ atomic_dec(&tl->pin_count);
}
static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
{
const unsigned long sz = PAGE_SIZE / 2;
- struct mock_ring *ring;
+ struct intel_ring *ring;
ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
if (!ring)
return NULL;
- if (i915_timeline_init(engine->i915, &ring->timeline, NULL)) {
+ kref_init(&ring->ref);
+ ring->size = sz;
+ ring->effective_size = sz;
+ ring->vaddr = (void *)(ring + 1);
+ atomic_set(&ring->pin_count, 1);
+
+ ring->vma = i915_vma_alloc();
+ if (!ring->vma) {
kfree(ring);
return NULL;
}
+ i915_active_init(&ring->vma->active, NULL, NULL);
- kref_init(&ring->base.ref);
- ring->base.size = sz;
- ring->base.effective_size = sz;
- ring->base.vaddr = (void *)(ring + 1);
- ring->base.timeline = &ring->timeline;
- atomic_set(&ring->base.pin_count, 1);
-
- INIT_LIST_HEAD(&ring->base.request_list);
- intel_ring_update_space(&ring->base);
+ intel_ring_update_space(ring);
- return &ring->base;
+ return ring;
}
-static void mock_ring_free(struct intel_ring *base)
+static void mock_ring_free(struct intel_ring *ring)
{
- struct mock_ring *ring = container_of(base, typeof(*ring), base);
+ i915_active_fini(&ring->vma->active);
+ i915_vma_free(ring->vma);
- i915_timeline_fini(&ring->timeline);
kfree(ring);
}
@@ -95,7 +92,7 @@ static void advance(struct i915_request *request)
i915_request_mark_complete(request);
GEM_BUG_ON(!i915_request_completed(request));
- intel_engine_queue_breadcrumbs(request->engine);
+ intel_engine_signal_breadcrumbs(request->engine);
}
static void hw_delay_complete(struct timer_list *t)
@@ -130,7 +127,6 @@ static void hw_delay_complete(struct timer_list *t)
static void mock_context_unpin(struct intel_context *ce)
{
- mock_timeline_unpin(ce->ring->timeline);
}
static void mock_context_destroy(struct kref *ref)
@@ -139,37 +135,52 @@ static void mock_context_destroy(struct kref *ref)
GEM_BUG_ON(intel_context_is_pinned(ce));
- if (ce->ring)
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
mock_ring_free(ce->ring);
+ mock_timeline_unpin(ce->timeline);
+ }
+ intel_context_fini(ce);
intel_context_free(ce);
}
-static int mock_context_pin(struct intel_context *ce)
+static int mock_context_alloc(struct intel_context *ce)
{
- int ret;
-
- if (!ce->ring) {
- ce->ring = mock_ring(ce->engine);
- if (!ce->ring)
- return -ENOMEM;
+ ce->ring = mock_ring(ce->engine);
+ if (!ce->ring)
+ return -ENOMEM;
+
+ GEM_BUG_ON(ce->timeline);
+ ce->timeline = intel_timeline_create(ce->engine->gt, NULL);
+ if (IS_ERR(ce->timeline)) {
+ kfree(ce->engine);
+ return PTR_ERR(ce->timeline);
}
- ret = intel_context_active_acquire(ce, PIN_HIGH);
- if (ret)
- return ret;
+ mock_timeline_pin(ce->timeline);
- mock_timeline_pin(ce->ring->timeline);
return 0;
}
+static int mock_context_pin(struct intel_context *ce)
+{
+ return 0;
+}
+
+static void mock_context_reset(struct intel_context *ce)
+{
+}
+
static const struct intel_context_ops mock_context_ops = {
+ .alloc = mock_context_alloc,
+
.pin = mock_context_pin,
.unpin = mock_context_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
+ .reset = mock_context_reset,
.destroy = mock_context_destroy,
};
@@ -216,16 +227,12 @@ static void mock_reset_prepare(struct intel_engine_cs *engine)
{
}
-static void mock_reset(struct intel_engine_cs *engine, bool stalled)
+static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
{
GEM_BUG_ON(stalled);
}
-static void mock_reset_finish(struct intel_engine_cs *engine)
-{
-}
-
-static void mock_cancel_requests(struct intel_engine_cs *engine)
+static void mock_reset_cancel(struct intel_engine_cs *engine)
{
struct i915_request *request;
unsigned long flags;
@@ -243,6 +250,24 @@ static void mock_cancel_requests(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->active.lock, flags);
}
+static void mock_reset_finish(struct intel_engine_cs *engine)
+{
+}
+
+static void mock_engine_release(struct intel_engine_cs *engine)
+{
+ struct mock_engine *mock =
+ container_of(engine, typeof(*mock), base);
+
+ GEM_BUG_ON(timer_pending(&mock->hw_delay));
+
+ intel_context_unpin(engine->kernel_context);
+ intel_context_put(engine->kernel_context);
+
+ intel_engine_fini_retire(engine);
+ intel_engine_fini_breadcrumbs(engine);
+}
+
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
const char *name,
int id)
@@ -250,6 +275,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
struct mock_engine *engine;
GEM_BUG_ON(id >= I915_NUM_ENGINES);
+ GEM_BUG_ON(!i915->gt.uncore);
engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL);
if (!engine)
@@ -257,9 +283,13 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
/* minimal engine setup for requests */
engine->base.i915 = i915;
+ engine->base.gt = &i915->gt;
+ engine->base.uncore = i915->gt.uncore;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id;
engine->base.mask = BIT(id);
+ engine->base.legacy_idx = INVALID_ENGINE;
+ engine->base.instance = id;
engine->base.status_page.addr = (void *)(engine + 1);
engine->base.cops = &mock_context_ops;
@@ -269,38 +299,41 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.submit_request = mock_submit_request;
engine->base.reset.prepare = mock_reset_prepare;
- engine->base.reset.reset = mock_reset;
+ engine->base.reset.rewind = mock_reset_rewind;
+ engine->base.reset.cancel = mock_reset_cancel;
engine->base.reset.finish = mock_reset_finish;
- engine->base.cancel_requests = mock_cancel_requests;
+
+ engine->base.release = mock_engine_release;
+
+ i915->gt.engine[id] = &engine->base;
+ i915->gt.engine_class[0][id] = &engine->base;
/* fake hw queue */
spin_lock_init(&engine->hw_lock);
timer_setup(&engine->hw_delay, hw_delay_complete, 0);
INIT_LIST_HEAD(&engine->hw_queue);
+ intel_engine_add_user(&engine->base);
+
return &engine->base;
}
int mock_engine_init(struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = engine->i915;
- int err;
+ struct intel_context *ce;
intel_engine_init_active(engine, ENGINE_MOCK);
intel_engine_init_breadcrumbs(engine);
intel_engine_init_execlists(engine);
intel_engine_init__pm(engine);
+ intel_engine_init_retire(engine);
+ intel_engine_pool_init(&engine->pool);
- engine->kernel_context =
- i915_gem_context_get_engine(i915->kernel_context, engine->id);
- if (IS_ERR(engine->kernel_context))
- goto err_breadcrumbs;
-
- err = intel_context_pin(engine->kernel_context);
- intel_context_put(engine->kernel_context);
- if (err)
+ ce = create_kernel_context(engine);
+ if (IS_ERR(ce))
goto err_breadcrumbs;
+ engine->kernel_context = ce;
return 0;
err_breadcrumbs:
@@ -325,17 +358,3 @@ void mock_engine_flush(struct intel_engine_cs *engine)
void mock_engine_reset(struct intel_engine_cs *engine)
{
}
-
-void mock_engine_free(struct intel_engine_cs *engine)
-{
- struct mock_engine *mock =
- container_of(engine, typeof(*mock), base);
-
- GEM_BUG_ON(timer_pending(&mock->hw_delay));
-
- intel_context_unpin(engine->kernel_context);
-
- intel_engine_fini_breadcrumbs(engine);
-
- kfree(engine);
-}
diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c
new file mode 100644
index 000000000000..e874dfaa5316
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_context.c
@@ -0,0 +1,443 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+#include "intel_engine_heartbeat.h"
+#include "intel_engine_pm.h"
+#include "intel_gt.h"
+
+#include "gem/selftests/mock_context.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+
+static int request_sync(struct i915_request *rq)
+{
+ struct intel_timeline *tl = i915_request_timeline(rq);
+ long timeout;
+ int err = 0;
+
+ intel_timeline_get(tl);
+ i915_request_get(rq);
+
+ /* Opencode i915_request_add() so we can keep the timeline locked. */
+ __i915_request_commit(rq);
+ __i915_request_queue(rq, NULL);
+
+ timeout = i915_request_wait(rq, 0, HZ / 10);
+ if (timeout < 0)
+ err = timeout;
+ else
+ i915_request_retire_upto(rq);
+
+ lockdep_unpin_lock(&tl->mutex, rq->cookie);
+ mutex_unlock(&tl->mutex);
+
+ i915_request_put(rq);
+ intel_timeline_put(tl);
+
+ return err;
+}
+
+static int context_sync(struct intel_context *ce)
+{
+ struct intel_timeline *tl = ce->timeline;
+ int err = 0;
+
+ mutex_lock(&tl->mutex);
+ do {
+ struct i915_request *rq;
+ long timeout;
+
+ if (list_empty(&tl->requests))
+ break;
+
+ rq = list_last_entry(&tl->requests, typeof(*rq), link);
+ i915_request_get(rq);
+
+ timeout = i915_request_wait(rq, 0, HZ / 10);
+ if (timeout < 0)
+ err = timeout;
+ else
+ i915_request_retire_upto(rq);
+
+ i915_request_put(rq);
+ } while (!err);
+ mutex_unlock(&tl->mutex);
+
+ return err;
+}
+
+static int __live_context_size(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ void *vaddr;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = intel_context_pin(ce);
+ if (err)
+ goto err;
+
+ vaddr = i915_gem_object_pin_map(ce->state->obj,
+ i915_coherent_map_type(engine->i915));
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ intel_context_unpin(ce);
+ goto err;
+ }
+
+ /*
+ * Note that execlists also applies a redzone which it checks on
+ * context unpin when debugging. We are using the same location
+ * and same poison value so that our checks overlap. Despite the
+ * redundancy, we want to keep this little selftest so that we
+ * get coverage of any and all submission backends, and we can
+ * always extend this test to ensure we trick the HW into a
+ * compromising position wrt to the various sections that need
+ * to be written into the context state.
+ *
+ * TLDR; this overlaps with the execlists redzone.
+ */
+ vaddr += engine->context_size - I915_GTT_PAGE_SIZE;
+ memset(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE);
+
+ rq = intel_context_create_request(ce);
+ intel_context_unpin(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ err = request_sync(rq);
+ if (err)
+ goto err_unpin;
+
+ /* Force the context switch */
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+ err = request_sync(rq);
+ if (err)
+ goto err_unpin;
+
+ if (memchr_inv(vaddr, POISON_INUSE, I915_GTT_PAGE_SIZE)) {
+ pr_err("%s context overwrote trailing red-zone!", engine->name);
+ err = -EINVAL;
+ }
+
+err_unpin:
+ i915_gem_object_unpin_map(ce->state->obj);
+err:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_context_size(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that our context sizes are correct by seeing if the
+ * HW tries to write past the end of one.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct {
+ struct drm_i915_gem_object *state;
+ void *pinned;
+ } saved;
+
+ if (!engine->context_size)
+ continue;
+
+ intel_engine_pm_get(engine);
+
+ /*
+ * Hide the old default state -- we lie about the context size
+ * and get confused when the default state is smaller than
+ * expected. For our do nothing request, inheriting the
+ * active state is sufficient, we are only checking that we
+ * don't use more than we planned.
+ */
+ saved.state = fetch_and_zero(&engine->default_state);
+ saved.pinned = fetch_and_zero(&engine->pinned_default_state);
+
+ /* Overlaps with the execlists redzone */
+ engine->context_size += I915_GTT_PAGE_SIZE;
+
+ err = __live_context_size(engine);
+
+ engine->context_size -= I915_GTT_PAGE_SIZE;
+
+ engine->pinned_default_state = saved.pinned;
+ engine->default_state = saved.state;
+
+ intel_engine_pm_put(engine);
+
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int __live_active_context(struct intel_engine_cs *engine)
+{
+ unsigned long saved_heartbeat;
+ struct intel_context *ce;
+ int pass;
+ int err;
+
+ /*
+ * We keep active contexts alive until after a subsequent context
+ * switch as the final write from the context-save will be after
+ * we retire the final request. We track when we unpin the context,
+ * under the presumption that the final pin is from the last request,
+ * and instead of immediately unpinning the context, we add a task
+ * to unpin the context from the next idle-barrier.
+ *
+ * This test makes sure that the context is kept alive until a
+ * subsequent idle-barrier (emitted when the engine wakeref hits 0
+ * with no more outstanding requests).
+ */
+
+ if (intel_engine_pm_is_awake(engine)) {
+ pr_err("%s is awake before starting %s!\n",
+ engine->name, __func__);
+ return -EINVAL;
+ }
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ saved_heartbeat = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ for (pass = 0; pass <= 2; pass++) {
+ struct i915_request *rq;
+
+ intel_engine_pm_get(engine);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_engine;
+ }
+
+ err = request_sync(rq);
+ if (err)
+ goto out_engine;
+
+ /* Context will be kept active until after an idle-barrier. */
+ if (i915_active_is_idle(&ce->active)) {
+ pr_err("context is not active; expected idle-barrier (%s pass %d)\n",
+ engine->name, pass);
+ err = -EINVAL;
+ goto out_engine;
+ }
+
+ if (!intel_engine_pm_is_awake(engine)) {
+ pr_err("%s is asleep before idle-barrier\n",
+ engine->name);
+ err = -EINVAL;
+ goto out_engine;
+ }
+
+out_engine:
+ intel_engine_pm_put(engine);
+ if (err)
+ goto err;
+ }
+
+ /* Now make sure our idle-barriers are flushed */
+ err = intel_engine_flush_barriers(engine);
+ if (err)
+ goto err;
+
+ /* Wait for the barrier and in the process wait for engine to park */
+ err = context_sync(engine->kernel_context);
+ if (err)
+ goto err;
+
+ if (!i915_active_is_idle(&ce->active)) {
+ pr_err("context is still active!");
+ err = -EINVAL;
+ }
+
+ intel_engine_pm_flush(engine);
+
+ if (intel_engine_pm_is_awake(engine)) {
+ struct drm_printer p = drm_debug_printer(__func__);
+
+ intel_engine_dump(engine, &p,
+ "%s is still awake:%d after idle-barriers\n",
+ engine->name,
+ atomic_read(&engine->wakeref.count));
+ GEM_TRACE_DUMP();
+
+ err = -EINVAL;
+ goto err;
+ }
+
+err:
+ engine->props.heartbeat_interval_ms = saved_heartbeat;
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_active_context(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ for_each_engine(engine, gt, id) {
+ err = __live_active_context(engine);
+ if (err)
+ break;
+
+ err = igt_flush_test(gt->i915);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int __remote_sync(struct intel_context *ce, struct intel_context *remote)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = intel_context_pin(remote);
+ if (err)
+ return err;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto unpin;
+ }
+
+ err = intel_context_prepare_remote_request(remote, rq);
+ if (err) {
+ i915_request_add(rq);
+ goto unpin;
+ }
+
+ err = request_sync(rq);
+
+unpin:
+ intel_context_unpin(remote);
+ return err;
+}
+
+static int __live_remote_context(struct intel_engine_cs *engine)
+{
+ struct intel_context *local, *remote;
+ unsigned long saved_heartbeat;
+ int pass;
+ int err;
+
+ /*
+ * Check that our idle barriers do not interfere with normal
+ * activity tracking. In particular, check that operating
+ * on the context image remotely (intel_context_prepare_remote_request),
+ * which inserts foreign fences into intel_context.active, does not
+ * clobber the idle-barrier.
+ */
+
+ if (intel_engine_pm_is_awake(engine)) {
+ pr_err("%s is awake before starting %s!\n",
+ engine->name, __func__);
+ return -EINVAL;
+ }
+
+ remote = intel_context_create(engine);
+ if (IS_ERR(remote))
+ return PTR_ERR(remote);
+
+ local = intel_context_create(engine);
+ if (IS_ERR(local)) {
+ err = PTR_ERR(local);
+ goto err_remote;
+ }
+
+ saved_heartbeat = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+ intel_engine_pm_get(engine);
+
+ for (pass = 0; pass <= 2; pass++) {
+ err = __remote_sync(local, remote);
+ if (err)
+ break;
+
+ err = __remote_sync(engine->kernel_context, remote);
+ if (err)
+ break;
+
+ if (i915_active_is_idle(&remote->active)) {
+ pr_err("remote context is not active; expected idle-barrier (%s pass %d)\n",
+ engine->name, pass);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ intel_engine_pm_put(engine);
+ engine->props.heartbeat_interval_ms = saved_heartbeat;
+
+ intel_context_put(local);
+err_remote:
+ intel_context_put(remote);
+ return err;
+}
+
+static int live_remote_context(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ for_each_engine(engine, gt, id) {
+ err = __live_remote_context(engine);
+ if (err)
+ break;
+
+ err = igt_flush_test(gt->i915);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int intel_context_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_context_size),
+ SUBTEST(live_active_context),
+ SUBTEST(live_remote_context),
+ };
+ struct intel_gt *gt = &i915->gt;
+
+ if (intel_gt_is_wedged(gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.c b/drivers/gpu/drm/i915/gt/selftest_engine.c
new file mode 100644
index 000000000000..f65b118e261d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine.c
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+#include "selftest_engine.h"
+
+int intel_engine_live_selftests(struct drm_i915_private *i915)
+{
+ static int (* const tests[])(struct intel_gt *) = {
+ live_engine_pm_selftests,
+ NULL,
+ };
+ struct intel_gt *gt = &i915->gt;
+ typeof(*tests) *fn;
+
+ for (fn = tests; *fn; fn++) {
+ int err;
+
+ err = (*fn)(gt);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine.h b/drivers/gpu/drm/i915/gt/selftest_engine.h
new file mode 100644
index 000000000000..ab32d09ec5a1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_ENGINE_H
+#define SELFTEST_ENGINE_H
+
+struct intel_gt;
+
+int live_engine_pm_selftests(struct intel_gt *gt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index cfaa6b296835..f88e445a1cae 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -4,7 +4,365 @@
* Copyright © 2018 Intel Corporation
*/
-#include "../i915_selftest.h"
+#include <linux/sort.h>
+
+#include "intel_gt_pm.h"
+#include "intel_rps.h"
+
+#include "i915_selftest.h"
+#include "selftests/igt_flush_test.h"
+
+#define COUNT 5
+
+static int cmp_u32(const void *A, const void *B)
+{
+ const u32 *a = A, *b = B;
+
+ return *a - *b;
+}
+
+static void perf_begin(struct intel_gt *gt)
+{
+ intel_gt_pm_get(gt);
+
+ /* Boost gpufreq to max [waitboost] and keep it fixed */
+ atomic_inc(&gt->rps.num_waiters);
+ schedule_work(&gt->rps.work);
+ flush_work(&gt->rps.work);
+}
+
+static int perf_end(struct intel_gt *gt)
+{
+ atomic_dec(&gt->rps.num_waiters);
+ intel_gt_pm_put(gt);
+
+ return igt_flush_test(gt->i915);
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+ u32 cmd;
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ cmd++;
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static struct i915_vma *create_empty_batch(struct intel_context *ce)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_put;
+ }
+
+ cs[0] = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_unpin_map(obj);
+ return vma;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static u32 trifilter(u32 *a)
+{
+ u64 sum;
+
+ sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
+
+ sum = mul_u32_u32(a[2], 2);
+ sum += a[1];
+ sum += a[3];
+
+ return sum >> 2;
+}
+
+static int perf_mi_bb_start(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ return 0;
+
+ perf_begin(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_vma *batch;
+ u32 cycles[COUNT];
+ int i;
+
+ intel_engine_pm_get(engine);
+
+ batch = create_empty_batch(ce);
+ if (IS_ERR(batch)) {
+ err = PTR_ERR(batch);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(batch);
+ if (err) {
+ intel_engine_pm_put(engine);
+ i915_vma_put(batch);
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = write_timestamp(rq, 2);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ batch->node.start, 8,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 3);
+ if (err)
+ goto out;
+
+out:
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+ if (err)
+ break;
+
+ cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
+ }
+ i915_vma_put(batch);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+
+ pr_info("%s: MI_BB_START cycles: %u\n",
+ engine->name, trifilter(cycles));
+ }
+ if (perf_end(gt))
+ err = -EIO;
+
+ return err;
+}
+
+static struct i915_vma *create_nop_batch(struct intel_context *ce)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_put;
+ }
+
+ memset(cs, 0, SZ_64K);
+ cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
+
+ i915_gem_object_flush_map(obj);
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_unpin;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_unpin;
+
+ i915_gem_object_unpin_map(obj);
+ return vma;
+
+err_unpin:
+ i915_gem_object_unpin_map(obj);
+err_put:
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+}
+
+static int perf_mi_noop(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ return 0;
+
+ perf_begin(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce = engine->kernel_context;
+ struct i915_vma *base, *nop;
+ u32 cycles[COUNT];
+ int i;
+
+ intel_engine_pm_get(engine);
+
+ base = create_empty_batch(ce);
+ if (IS_ERR(base)) {
+ err = PTR_ERR(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(base);
+ if (err) {
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ nop = create_nop_batch(ce);
+ if (IS_ERR(nop)) {
+ err = PTR_ERR(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ err = i915_vma_sync(nop);
+ if (err) {
+ i915_vma_put(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(cycles); i++) {
+ struct i915_request *rq;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = write_timestamp(rq, 2);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ base->node.start, 8,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 3);
+ if (err)
+ goto out;
+
+ err = rq->engine->emit_bb_start(rq,
+ nop->node.start,
+ nop->node.size,
+ 0);
+ if (err)
+ goto out;
+
+ err = write_timestamp(rq, 4);
+ if (err)
+ goto out;
+
+out:
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+ if (err)
+ break;
+
+ cycles[i] =
+ (rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
+ (rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
+ }
+ i915_vma_put(nop);
+ i915_vma_put(base);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+
+ pr_info("%s: 16K MI_NOOP cycles: %u\n",
+ engine->name, trifilter(cycles));
+ }
+ if (perf_end(gt))
+ err = -EIO;
+
+ return err;
+}
+
+int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(perf_mi_bb_start),
+ SUBTEST(perf_mi_noop),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
static int intel_mmio_bases_check(void *arg)
{
@@ -12,19 +370,18 @@ static int intel_mmio_bases_check(void *arg)
for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
const struct engine_info *info = &intel_engines[i];
- char name[INTEL_ENGINE_CS_MAX_NAME];
u8 prev = U8_MAX;
- __sprint_engine_name(name, info);
-
for (j = 0; j < MAX_MMIO_BASES; j++) {
u8 gen = info->mmio_bases[j].gen;
u32 base = info->mmio_bases[j].base;
if (gen >= prev) {
- pr_err("%s: %s: mmio base for gen %x "
- "is before the one for gen %x\n",
- __func__, name, prev, gen);
+ pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n",
+ __func__,
+ intel_engine_class_repr(info->class),
+ info->class, info->instance,
+ prev, gen);
return -EINVAL;
}
@@ -32,17 +389,22 @@ static int intel_mmio_bases_check(void *arg)
break;
if (!base) {
- pr_err("%s: %s: invalid mmio base (%x) "
- "for gen %x at entry %u\n",
- __func__, name, base, gen, j);
+ pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n",
+ __func__,
+ intel_engine_class_repr(info->class),
+ info->class, info->instance,
+ base, gen, j);
return -EINVAL;
}
prev = gen;
}
- pr_info("%s: min gen supported for %s = %d\n",
- __func__, name, prev);
+ pr_debug("%s: min gen supported for %s%d is %d\n",
+ __func__,
+ intel_engine_class_repr(info->class),
+ info->instance,
+ prev);
}
return 0;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
new file mode 100644
index 000000000000..43d4d589749f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -0,0 +1,374 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/sort.h>
+
+#include "i915_drv.h"
+
+#include "intel_gt_requests.h"
+#include "i915_selftest.h"
+
+static int timeline_sync(struct intel_timeline *tl)
+{
+ struct dma_fence *fence;
+ long timeout;
+
+ fence = i915_active_fence_get(&tl->last_request);
+ if (!fence)
+ return 0;
+
+ timeout = dma_fence_wait_timeout(fence, true, HZ / 2);
+ dma_fence_put(fence);
+ if (timeout < 0)
+ return timeout;
+
+ return 0;
+}
+
+static int engine_sync_barrier(struct intel_engine_cs *engine)
+{
+ return timeline_sync(engine->kernel_context->timeline);
+}
+
+struct pulse {
+ struct i915_active active;
+ struct kref kref;
+};
+
+static int pulse_active(struct i915_active *active)
+{
+ kref_get(&container_of(active, struct pulse, active)->kref);
+ return 0;
+}
+
+static void pulse_free(struct kref *kref)
+{
+ kfree(container_of(kref, struct pulse, kref));
+}
+
+static void pulse_put(struct pulse *p)
+{
+ kref_put(&p->kref, pulse_free);
+}
+
+static void pulse_retire(struct i915_active *active)
+{
+ pulse_put(container_of(active, struct pulse, active));
+}
+
+static struct pulse *pulse_create(void)
+{
+ struct pulse *p;
+
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return p;
+
+ kref_init(&p->kref);
+ i915_active_init(&p->active, pulse_active, pulse_retire);
+
+ return p;
+}
+
+static void pulse_unlock_wait(struct pulse *p)
+{
+ i915_active_unlock_wait(&p->active);
+}
+
+static int __live_idle_pulse(struct intel_engine_cs *engine,
+ int (*fn)(struct intel_engine_cs *cs))
+{
+ struct pulse *p;
+ int err;
+
+ GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
+
+ p = pulse_create();
+ if (!p)
+ return -ENOMEM;
+
+ err = i915_active_acquire(&p->active);
+ if (err)
+ goto out;
+
+ err = i915_active_acquire_preallocate_barrier(&p->active, engine);
+ if (err) {
+ i915_active_release(&p->active);
+ goto out;
+ }
+
+ i915_active_acquire_barrier(&p->active);
+ i915_active_release(&p->active);
+
+ GEM_BUG_ON(i915_active_is_idle(&p->active));
+ GEM_BUG_ON(llist_empty(&engine->barrier_tasks));
+
+ err = fn(engine);
+ if (err)
+ goto out;
+
+ GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
+
+ if (engine_sync_barrier(engine)) {
+ struct drm_printer m = drm_err_printer("pulse");
+
+ pr_err("%s: no heartbeat pulse?\n", engine->name);
+ intel_engine_dump(engine, &m, "%s", engine->name);
+
+ err = -ETIME;
+ goto out;
+ }
+
+ GEM_BUG_ON(READ_ONCE(engine->serial) != engine->wakeref_serial);
+
+ pulse_unlock_wait(p); /* synchronize with the retirement callback */
+
+ if (!i915_active_is_idle(&p->active)) {
+ struct drm_printer m = drm_err_printer("pulse");
+
+ pr_err("%s: heartbeat pulse did not flush idle tasks\n",
+ engine->name);
+ i915_active_print(&p->active, &m);
+
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ pulse_put(p);
+ return err;
+}
+
+static int live_idle_flush(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that we can flush the idle barriers */
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_idle_pulse(engine, intel_engine_flush_barriers);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int live_idle_pulse(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that heartbeat pulses flush the idle barriers */
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = __live_idle_pulse(engine, intel_engine_pulse);
+ intel_engine_pm_put(engine);
+ if (err && err != -ENODEV)
+ break;
+
+ err = 0;
+ }
+
+ return err;
+}
+
+static int cmp_u32(const void *_a, const void *_b)
+{
+ const u32 *a = _a, *b = _b;
+
+ return *a - *b;
+}
+
+static int __live_heartbeat_fast(struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ ktime_t t0, t1;
+ u32 times[5];
+ int err;
+ int i;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ intel_engine_pm_get(engine);
+
+ err = intel_engine_set_heartbeat(engine, 1);
+ if (err)
+ goto err_pm;
+
+ for (i = 0; i < ARRAY_SIZE(times); i++) {
+ /* Manufacture a tick */
+ do {
+ while (READ_ONCE(engine->heartbeat.systole))
+ flush_delayed_work(&engine->heartbeat.work);
+
+ engine->serial++; /* quick, pretend we are not idle! */
+ flush_delayed_work(&engine->heartbeat.work);
+ if (!delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat did not start\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_pm;
+ }
+
+ rcu_read_lock();
+ rq = READ_ONCE(engine->heartbeat.systole);
+ if (rq)
+ rq = i915_request_get_rcu(rq);
+ rcu_read_unlock();
+ } while (!rq);
+
+ t0 = ktime_get();
+ while (rq == READ_ONCE(engine->heartbeat.systole))
+ yield(); /* work is on the local cpu! */
+ t1 = ktime_get();
+
+ i915_request_put(rq);
+ times[i] = ktime_us_delta(t1, t0);
+ }
+
+ sort(times, ARRAY_SIZE(times), sizeof(times[0]), cmp_u32, NULL);
+
+ pr_info("%s: Heartbeat delay: %uus [%u, %u]\n",
+ engine->name,
+ times[ARRAY_SIZE(times) / 2],
+ times[0],
+ times[ARRAY_SIZE(times) - 1]);
+
+ /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */
+ if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) {
+ pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
+ engine->name,
+ times[ARRAY_SIZE(times) / 2],
+ jiffies_to_usecs(6));
+ err = -EINVAL;
+ }
+
+ intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+ intel_engine_pm_put(engine);
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_heartbeat_fast(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that the heartbeat ticks at the desired rate. */
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ err = __live_heartbeat_fast(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+static int __live_heartbeat_off(struct intel_engine_cs *engine)
+{
+ int err;
+
+ intel_engine_pm_get(engine);
+
+ engine->serial++;
+ flush_delayed_work(&engine->heartbeat.work);
+ if (!delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat not running\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_pm;
+ }
+
+ err = intel_engine_set_heartbeat(engine, 0);
+ if (err)
+ goto err_pm;
+
+ engine->serial++;
+ flush_delayed_work(&engine->heartbeat.work);
+ if (delayed_work_pending(&engine->heartbeat.work)) {
+ pr_err("%s: heartbeat still running\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_beat;
+ }
+
+ if (READ_ONCE(engine->heartbeat.systole)) {
+ pr_err("%s: heartbeat still allocated\n",
+ engine->name);
+ err = -EINVAL;
+ goto err_beat;
+ }
+
+err_beat:
+ intel_engine_set_heartbeat(engine, CONFIG_DRM_I915_HEARTBEAT_INTERVAL);
+err_pm:
+ intel_engine_pm_put(engine);
+ return err;
+}
+
+static int live_heartbeat_off(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that we can turn off heartbeat and not interrupt VIP */
+ if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL)
+ return 0;
+
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ err = __live_heartbeat_off(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int intel_heartbeat_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_idle_flush),
+ SUBTEST(live_idle_pulse),
+ SUBTEST(live_heartbeat_fast),
+ SUBTEST(live_heartbeat_off),
+ };
+ int saved_hangcheck;
+ int err;
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ saved_hangcheck = i915_modparams.enable_hangcheck;
+ i915_modparams.enable_hangcheck = INT_MAX;
+
+ err = intel_gt_live_subtests(tests, &i915->gt);
+
+ i915_modparams.enable_hangcheck = saved_hangcheck;
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
new file mode 100644
index 000000000000..cbf6b0735272
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+#include "selftest_engine.h"
+#include "selftests/igt_atomic.h"
+
+static int live_engine_pm(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * Check we can call intel_engine_pm_put from any context. No
+ * failures are reported directly, but if we mess up lockdep should
+ * tell us.
+ */
+ if (intel_gt_pm_wait_for_idle(gt)) {
+ pr_err("Unable to flush GT pm before test\n");
+ return -EBUSY;
+ }
+
+ GEM_BUG_ON(intel_gt_pm_is_awake(gt));
+ for_each_engine(engine, gt, id) {
+ const typeof(*igt_atomic_phases) *p;
+
+ for (p = igt_atomic_phases; p->name; p++) {
+ /*
+ * Acquisition is always synchronous, except if we
+ * know that the engine is already awake, in which
+ * case we should use intel_engine_pm_get_if_awake()
+ * to atomically grab the wakeref.
+ *
+ * In practice,
+ * intel_engine_pm_get();
+ * intel_engine_pm_put();
+ * occurs in one thread, while simultaneously
+ * intel_engine_pm_get_if_awake();
+ * intel_engine_pm_put();
+ * occurs from atomic context in another.
+ */
+ GEM_BUG_ON(intel_engine_pm_is_awake(engine));
+ intel_engine_pm_get(engine);
+
+ p->critical_section_begin();
+ if (!intel_engine_pm_get_if_awake(engine))
+ pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n",
+ engine->name, p->name);
+ else
+ intel_engine_pm_put_async(engine);
+ intel_engine_pm_put_async(engine);
+ p->critical_section_end();
+
+ intel_engine_pm_flush(engine);
+
+ if (intel_engine_pm_is_awake(engine)) {
+ pr_err("%s is still awake after flushing pm\n",
+ engine->name);
+ return -EINVAL;
+ }
+
+ /* gt wakeref is async (deferred to workqueue) */
+ if (intel_gt_pm_wait_for_idle(gt)) {
+ pr_err("GT failed to idle\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int live_engine_pm_selftests(struct intel_gt *gt)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_engine_pm),
+ };
+
+ return intel_gt_live_subtests(tests, gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
new file mode 100644
index 000000000000..09ff8e4f88af
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -0,0 +1,79 @@
+
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "selftest_llc.h"
+#include "selftest_rc6.h"
+
+static int live_gt_resume(void *arg)
+{
+ struct intel_gt *gt = arg;
+ IGT_TIMEOUT(end_time);
+ int err;
+
+ /* Do several suspend/resume cycles to check we don't explode! */
+ do {
+ intel_gt_suspend_prepare(gt);
+ intel_gt_suspend_late(gt);
+
+ if (gt->rc6.enabled) {
+ pr_err("rc6 still enabled after suspend!\n");
+ intel_gt_set_wedged_on_init(gt);
+ err = -EINVAL;
+ break;
+ }
+
+ err = intel_gt_resume(gt);
+ if (err)
+ break;
+
+ if (gt->rc6.supported && !gt->rc6.enabled) {
+ pr_err("rc6 not enabled upon resume!\n");
+ intel_gt_set_wedged_on_init(gt);
+ err = -EINVAL;
+ break;
+ }
+
+ err = st_llc_verify(&gt->llc);
+ if (err) {
+ pr_err("llc state not restored upon resume!\n");
+ intel_gt_set_wedged_on_init(gt);
+ break;
+ }
+ } while (!__igt_timeout(end_time, NULL));
+
+ return err;
+}
+
+int intel_gt_pm_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_rc6_manual),
+ SUBTEST(live_gt_resume),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
+
+int intel_gt_pm_late_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ /*
+ * These tests may leave the system in an undesirable state.
+ * They are intended to be run last in CI and the system
+ * rebooted afterwards.
+ */
+ SUBTEST(live_rc6_ctx_wa),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 1ee4c923044f..3e5e6c86e843 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -25,13 +25,15 @@
#include <linux/kthread.h>
#include "gem/i915_gem_context.h"
+
+#include "intel_gt.h"
+#include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h"
#include "i915_selftest.h"
#include "selftests/i915_random.h"
#include "selftests/igt_flush_test.h"
#include "selftests/igt_reset.h"
-#include "selftests/igt_wedge_me.h"
#include "selftests/igt_atomic.h"
#include "selftests/mock_drm.h"
@@ -42,7 +44,7 @@
#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */
struct hang {
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct drm_i915_gem_object *hws;
struct drm_i915_gem_object *obj;
struct i915_gem_context *ctx;
@@ -50,27 +52,27 @@ struct hang {
u32 *batch;
};
-static int hang_init(struct hang *h, struct drm_i915_private *i915)
+static int hang_init(struct hang *h, struct intel_gt *gt)
{
void *vaddr;
int err;
memset(h, 0, sizeof(*h));
- h->i915 = i915;
+ h->gt = gt;
- h->ctx = kernel_context(i915);
+ h->ctx = kernel_context(gt->i915);
if (IS_ERR(h->ctx))
return PTR_ERR(h->ctx);
GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx));
- h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(h->hws)) {
err = PTR_ERR(h->hws);
goto err_ctx;
}
- h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(h->obj)) {
err = PTR_ERR(h->obj);
goto err_hws;
@@ -85,7 +87,7 @@ static int hang_init(struct hang *h, struct drm_i915_private *i915)
h->seqno = memset(vaddr, 0xff, PAGE_SIZE);
vaddr = i915_gem_object_pin_map(h->obj,
- i915_coherent_map_type(i915));
+ i915_coherent_map_type(gt->i915));
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto err_unpin_hws;
@@ -118,7 +120,10 @@ static int move_to_active(struct i915_vma *vma,
int err;
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, flags);
+ err = i915_request_await_object(rq, vma->obj,
+ flags & EXEC_OBJECT_WRITE);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, flags);
i915_vma_unlock(vma);
return err;
@@ -127,47 +132,52 @@ static int move_to_active(struct i915_vma *vma,
static struct i915_request *
hang_create_request(struct hang *h, struct intel_engine_cs *engine)
{
- struct drm_i915_private *i915 = h->i915;
- struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm;
+ struct intel_gt *gt = h->gt;
+ struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx);
+ struct drm_i915_gem_object *obj;
struct i915_request *rq = NULL;
struct i915_vma *hws, *vma;
unsigned int flags;
+ void *vaddr;
u32 *batch;
int err;
- if (i915_gem_object_is_active(h->obj)) {
- struct drm_i915_gem_object *obj;
- void *vaddr;
-
- obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE);
- if (IS_ERR(obj))
- return ERR_CAST(obj);
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ i915_vm_put(vm);
+ return ERR_CAST(obj);
+ }
- vaddr = i915_gem_object_pin_map(obj,
- i915_coherent_map_type(h->i915));
- if (IS_ERR(vaddr)) {
- i915_gem_object_put(obj);
- return ERR_CAST(vaddr);
- }
+ vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(gt->i915));
+ if (IS_ERR(vaddr)) {
+ i915_gem_object_put(obj);
+ i915_vm_put(vm);
+ return ERR_CAST(vaddr);
+ }
- i915_gem_object_unpin_map(h->obj);
- i915_gem_object_put(h->obj);
+ i915_gem_object_unpin_map(h->obj);
+ i915_gem_object_put(h->obj);
- h->obj = obj;
- h->batch = vaddr;
- }
+ h->obj = obj;
+ h->batch = vaddr;
vma = i915_vma_instance(h->obj, vm, NULL);
- if (IS_ERR(vma))
+ if (IS_ERR(vma)) {
+ i915_vm_put(vm);
return ERR_CAST(vma);
+ }
hws = i915_vma_instance(h->hws, vm, NULL);
- if (IS_ERR(hws))
+ if (IS_ERR(hws)) {
+ i915_vm_put(vm);
return ERR_CAST(hws);
+ }
err = i915_vma_pin(vma, 0, 0, PIN_USER);
- if (err)
+ if (err) {
+ i915_vm_put(vm);
return ERR_PTR(err);
+ }
err = i915_vma_pin(hws, 0, 0, PIN_USER);
if (err)
@@ -188,7 +198,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
goto cancel_rq;
batch = h->batch;
- if (INTEL_GEN(i915) >= 8) {
+ if (INTEL_GEN(gt->i915) >= 8) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = lower_32_bits(hws_address(hws, rq));
*batch++ = upper_32_bits(hws_address(hws, rq));
@@ -202,7 +212,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
*batch++ = lower_32_bits(vma->node.start);
*batch++ = upper_32_bits(vma->node.start);
- } else if (INTEL_GEN(i915) >= 6) {
+ } else if (INTEL_GEN(gt->i915) >= 6) {
*batch++ = MI_STORE_DWORD_IMM_GEN4;
*batch++ = 0;
*batch++ = lower_32_bits(hws_address(hws, rq));
@@ -215,7 +225,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = MI_ARB_CHECK;
*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
*batch++ = lower_32_bits(vma->node.start);
- } else if (INTEL_GEN(i915) >= 4) {
+ } else if (INTEL_GEN(gt->i915) >= 4) {
*batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*batch++ = 0;
*batch++ = lower_32_bits(hws_address(hws, rq));
@@ -242,7 +252,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
*batch++ = lower_32_bits(vma->node.start);
}
*batch++ = MI_BATCH_BUFFER_END; /* not reached */
- i915_gem_chipset_flush(h->i915);
+ intel_gt_chipset_flush(engine->gt);
if (rq->engine->emit_init_breadcrumb) {
err = rq->engine->emit_init_breadcrumb(rq);
@@ -251,7 +261,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
}
flags = 0;
- if (INTEL_GEN(vm->i915) <= 5)
+ if (INTEL_GEN(gt->i915) <= 5)
flags |= I915_DISPATCH_SECURE;
err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
@@ -265,6 +275,7 @@ unpin_hws:
i915_vma_unpin(hws);
unpin_vma:
i915_vma_unpin(vma);
+ i915_vm_put(vm);
return err ? ERR_PTR(err) : rq;
}
@@ -276,7 +287,7 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq)
static void hang_fini(struct hang *h)
{
*h->batch = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(h->i915);
+ intel_gt_chipset_flush(h->gt);
i915_gem_object_unpin_map(h->obj);
i915_gem_object_put(h->obj);
@@ -286,7 +297,7 @@ static void hang_fini(struct hang *h)
kernel_context_close(h->ctx);
- igt_flush_test(h->i915, I915_WAIT_LOCKED);
+ igt_flush_test(h->gt->i915);
}
static bool wait_until_running(struct hang *h, struct i915_request *rq)
@@ -299,9 +310,27 @@ static bool wait_until_running(struct hang *h, struct i915_request *rq)
1000));
}
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int igt_hang_sanitycheck(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_request *rq;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -310,13 +339,12 @@ static int igt_hang_sanitycheck(void *arg)
/* Basic check that we can execute our hanging batch */
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
+ err = hang_init(&h, gt);
if (err)
- goto unlock;
+ return err;
- for_each_engine(engine, i915, id) {
- struct igt_wedge_me w;
+ for_each_engine(engine, gt, id) {
+ struct intel_wedge_me w;
long timeout;
if (!intel_engine_can_store_dword(engine))
@@ -333,15 +361,15 @@ static int igt_hang_sanitycheck(void *arg)
i915_request_get(rq);
*h.batch = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(i915);
+ intel_gt_chipset_flush(engine->gt);
i915_request_add(rq);
timeout = 0;
- igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
+ intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
timeout = i915_request_wait(rq, 0,
MAX_SCHEDULE_TIMEOUT);
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
timeout = -EIO;
i915_request_put(rq);
@@ -356,8 +384,6 @@ static int igt_hang_sanitycheck(void *arg)
fini:
hang_fini(&h);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -368,43 +394,33 @@ static bool wait_for_idle(struct intel_engine_cs *engine)
static int igt_reset_nop(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
unsigned int reset_count, count;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
- struct drm_file *file;
IGT_TIMEOUT(end_time);
int err = 0;
/* Check that we can reset during non-user portions of requests */
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- mutex_lock(&i915->drm.struct_mutex);
- ctx = live_context(i915, file);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- reset_count = i915_reset_count(&i915->gpu_error);
+ reset_count = i915_reset_count(global);
count = 0;
do {
- mutex_lock(&i915->drm.struct_mutex);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
int i;
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
for (i = 0; i < 16; i++) {
struct i915_request *rq;
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
@@ -412,83 +428,65 @@ static int igt_reset_nop(void *arg)
i915_request_add(rq);
}
+
+ intel_context_put(ce);
}
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_lock(i915);
- i915_reset(i915, ALL_ENGINES, NULL);
- igt_global_reset_unlock(i915);
- if (i915_reset_failed(i915)) {
+ igt_global_reset_lock(gt);
+ intel_gt_reset(gt, ALL_ENGINES, NULL);
+ igt_global_reset_unlock(gt);
+
+ if (intel_gt_is_wedged(gt)) {
err = -EIO;
break;
}
- if (i915_reset_count(&i915->gpu_error) !=
- reset_count + ++count) {
+ if (i915_reset_count(global) != reset_count + ++count) {
pr_err("Full GPU reset not recorded!\n");
err = -EINVAL;
break;
}
- err = igt_flush_test(i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
} while (time_before(jiffies, end_time));
pr_info("%s: %d resets\n", __func__, count);
- mutex_lock(&i915->drm.struct_mutex);
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
-
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-
-out:
- mock_file_free(i915, file);
- if (i915_reset_failed(i915))
+ if (igt_flush_test(gt->i915))
err = -EIO;
return err;
}
static int igt_reset_nop_engine(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
- struct drm_file *file;
- int err = 0;
/* Check that we can engine-reset during non-user portions */
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- mutex_lock(&i915->drm.struct_mutex);
- ctx = live_context(i915, file);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
-
- i915_gem_context_clear_bannable(ctx);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- for_each_engine(engine, i915, id) {
- unsigned int reset_count, reset_engine_count;
- unsigned int count;
+ for_each_engine(engine, gt, id) {
+ unsigned int reset_count, reset_engine_count, count;
+ struct intel_context *ce;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
+ int err;
- reset_count = i915_reset_count(&i915->gpu_error);
- reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
- engine);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ reset_count = i915_reset_count(global);
+ reset_engine_count = i915_reset_engine_count(global, engine);
count = 0;
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ engine_heartbeat_disable(engine, &heartbeat);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
int i;
@@ -499,11 +497,10 @@ static int igt_reset_nop_engine(void *arg)
break;
}
- mutex_lock(&i915->drm.struct_mutex);
for (i = 0; i < 16; i++) {
struct i915_request *rq;
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
@@ -511,21 +508,19 @@ static int igt_reset_nop_engine(void *arg)
i915_request_add(rq);
}
- mutex_unlock(&i915->drm.struct_mutex);
-
- err = i915_reset_engine(engine, NULL);
+ err = intel_engine_reset(engine, NULL);
if (err) {
pr_err("i915_reset_engine failed\n");
break;
}
- if (i915_reset_count(&i915->gpu_error) != reset_count) {
+ if (i915_reset_count(global) != reset_count) {
pr_err("Full GPU reset recorded! (engine reset expected)\n");
err = -EINVAL;
break;
}
- if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+ if (i915_reset_engine_count(global, engine) !=
reset_engine_count + ++count) {
pr_err("%s engine reset not recorded!\n",
engine->name);
@@ -533,31 +528,24 @@ static int igt_reset_nop_engine(void *arg)
break;
}
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ engine_heartbeat_enable(engine, heartbeat);
- if (err)
- break;
+ pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
- err = igt_flush_test(i915, 0);
+ intel_context_put(ce);
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
if (err)
- break;
+ return err;
}
- mutex_lock(&i915->drm.struct_mutex);
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
-
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
-out:
- mock_file_free(i915, file);
- if (i915_reset_failed(i915))
- err = -EIO;
- return err;
+ return 0;
}
-static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
+static int __igt_reset_engine(struct intel_gt *gt, bool active)
{
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct hang h;
@@ -565,19 +553,18 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
/* Check that we can issue an engine reset on an idle engine (no-op) */
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (active) {
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- mutex_unlock(&i915->drm.struct_mutex);
+ err = hang_init(&h, gt);
if (err)
return err;
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
unsigned int reset_count, reset_engine_count;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (active && !intel_engine_can_store_dword(engine))
@@ -590,30 +577,26 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break;
}
- reset_count = i915_reset_count(&i915->gpu_error);
- reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
- engine);
+ reset_count = i915_reset_count(global);
+ reset_engine_count = i915_reset_engine_count(global, engine);
- intel_engine_pm_get(engine);
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ engine_heartbeat_disable(engine, &heartbeat);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
if (active) {
struct i915_request *rq;
- mutex_lock(&i915->drm.struct_mutex);
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- mutex_unlock(&i915->drm.struct_mutex);
break;
}
i915_request_get(rq);
i915_request_add(rq);
- mutex_unlock(&i915->drm.struct_mutex);
if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to start request %llx, at %x\n",
__func__, rq->fence.seqno, hws_seqno(&h, rq));
@@ -628,19 +611,19 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
i915_request_put(rq);
}
- err = i915_reset_engine(engine, NULL);
+ err = intel_engine_reset(engine, NULL);
if (err) {
pr_err("i915_reset_engine failed\n");
break;
}
- if (i915_reset_count(&i915->gpu_error) != reset_count) {
+ if (i915_reset_count(global) != reset_count) {
pr_err("Full GPU reset recorded! (engine reset expected)\n");
err = -EINVAL;
break;
}
- if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+ if (i915_reset_engine_count(global, engine) !=
++reset_engine_count) {
pr_err("%s engine reset not recorded!\n",
engine->name);
@@ -648,25 +631,22 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
break;
}
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- intel_engine_pm_put(engine);
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ engine_heartbeat_enable(engine, heartbeat);
if (err)
break;
- err = igt_flush_test(i915, 0);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
err = -EIO;
- if (active) {
- mutex_lock(&i915->drm.struct_mutex);
+ if (active)
hang_fini(&h);
- mutex_unlock(&i915->drm.struct_mutex);
- }
return err;
}
@@ -707,7 +687,7 @@ static int active_request_put(struct i915_request *rq)
rq->fence.seqno);
GEM_TRACE_DUMP();
- i915_gem_set_wedged(rq->i915);
+ intel_gt_set_wedged(rq->engine->gt);
err = -EIO;
}
@@ -722,47 +702,42 @@ static int active_engine(void *data)
struct active_engine *arg = data;
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq[8] = {};
- struct i915_gem_context *ctx[ARRAY_SIZE(rq)];
- struct drm_file *file;
- unsigned long count = 0;
+ struct intel_context *ce[ARRAY_SIZE(rq)];
+ unsigned long count;
int err = 0;
- file = mock_file(engine->i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- for (count = 0; count < ARRAY_SIZE(ctx); count++) {
- mutex_lock(&engine->i915->drm.struct_mutex);
- ctx[count] = live_context(engine->i915, file);
- mutex_unlock(&engine->i915->drm.struct_mutex);
- if (IS_ERR(ctx[count])) {
- err = PTR_ERR(ctx[count]);
+ for (count = 0; count < ARRAY_SIZE(ce); count++) {
+ ce[count] = intel_context_create(engine);
+ if (IS_ERR(ce[count])) {
+ err = PTR_ERR(ce[count]);
while (--count)
- i915_gem_context_put(ctx[count]);
- goto err_file;
+ intel_context_put(ce[count]);
+ return err;
}
}
+ count = 0;
while (!kthread_should_stop()) {
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
struct i915_request *old = rq[idx];
struct i915_request *new;
- mutex_lock(&engine->i915->drm.struct_mutex);
- new = igt_request_alloc(ctx[idx], engine);
+ new = intel_context_create_request(ce[idx]);
if (IS_ERR(new)) {
- mutex_unlock(&engine->i915->drm.struct_mutex);
err = PTR_ERR(new);
break;
}
- if (arg->flags & TEST_PRIORITY)
- ctx[idx]->sched.priority =
- i915_prandom_u32_max_state(512, &prng);
-
rq[idx] = i915_request_get(new);
i915_request_add(new);
- mutex_unlock(&engine->i915->drm.struct_mutex);
+
+ if (engine->schedule && arg->flags & TEST_PRIORITY) {
+ struct i915_sched_attr attr = {
+ .priority =
+ i915_prandom_u32_max_state(512, &prng),
+ };
+ engine->schedule(rq[idx], &attr);
+ }
err = active_request_put(old);
if (err)
@@ -777,17 +752,18 @@ static int active_engine(void *data)
/* Keep the first error */
if (!err)
err = err__;
+
+ intel_context_put(ce[count]);
}
-err_file:
- mock_file_free(engine->i915, file);
return err;
}
-static int __igt_reset_engines(struct drm_i915_private *i915,
+static int __igt_reset_engines(struct intel_gt *gt,
const char *test_name,
unsigned int flags)
{
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine, *other;
enum intel_engine_id id, tmp;
struct hang h;
@@ -797,13 +773,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
* with any other engine.
*/
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (flags & TEST_ACTIVE) {
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
- mutex_unlock(&i915->drm.struct_mutex);
+ err = hang_init(&h, gt);
if (err)
return err;
@@ -811,10 +785,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
h.ctx->sched.priority = 1024;
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct active_engine threads[I915_NUM_ENGINES] = {};
- unsigned long global = i915_reset_count(&i915->gpu_error);
+ unsigned long device = i915_reset_count(global);
unsigned long count = 0, reported;
+ unsigned long heartbeat;
IGT_TIMEOUT(end_time);
if (flags & TEST_ACTIVE &&
@@ -829,12 +804,11 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
}
memset(threads, 0, sizeof(threads));
- for_each_engine(other, i915, tmp) {
+ for_each_engine(other, gt, tmp) {
struct task_struct *tsk;
threads[tmp].resets =
- i915_reset_engine_count(&i915->gpu_error,
- other);
+ i915_reset_engine_count(global, other);
if (!(flags & TEST_OTHERS))
continue;
@@ -856,26 +830,25 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
get_task_struct(tsk);
}
- intel_engine_pm_get(engine);
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ yield(); /* start all threads before we begin */
+
+ engine_heartbeat_disable(engine, &heartbeat);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
do {
struct i915_request *rq = NULL;
if (flags & TEST_ACTIVE) {
- mutex_lock(&i915->drm.struct_mutex);
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- mutex_unlock(&i915->drm.struct_mutex);
break;
}
i915_request_get(rq);
i915_request_add(rq);
- mutex_unlock(&i915->drm.struct_mutex);
if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to start request %llx, at %x\n",
__func__, rq->fence.seqno, hws_seqno(&h, rq));
@@ -888,7 +861,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
}
}
- err = i915_reset_engine(engine, NULL);
+ err = intel_engine_reset(engine, NULL);
if (err) {
pr_err("i915_reset_engine(%s:%s): failed, err=%d\n",
engine->name, test_name, err);
@@ -900,7 +873,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
if (rq) {
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("i915_reset_engine(%s:%s):"
" failed to complete request after reset\n",
@@ -910,7 +883,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
i915_request_put(rq);
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
break;
}
@@ -920,7 +893,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
if (!(flags & TEST_SELF) && !wait_for_idle(engine)) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("i915_reset_engine(%s:%s):"
" failed to idle after reset\n",
@@ -932,12 +905,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
break;
}
} while (time_before(jiffies, end_time));
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- intel_engine_pm_put(engine);
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ engine_heartbeat_enable(engine, heartbeat);
+
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
engine->name, test_name, count);
- reported = i915_reset_engine_count(&i915->gpu_error, engine);
+ reported = i915_reset_engine_count(global, engine);
reported -= threads[engine->id].resets;
if (reported != count) {
pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
@@ -947,7 +921,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915,
}
unwind:
- for_each_engine(other, i915, tmp) {
+ for_each_engine(other, gt, tmp) {
int ret;
if (!threads[tmp].task)
@@ -962,22 +936,21 @@ unwind:
}
put_task_struct(threads[tmp].task);
- if (other != engine &&
+ if (other->uabi_class != engine->uabi_class &&
threads[tmp].resets !=
- i915_reset_engine_count(&i915->gpu_error, other)) {
+ i915_reset_engine_count(global, other)) {
pr_err("Innocent engine %s was reset (count=%ld)\n",
other->name,
- i915_reset_engine_count(&i915->gpu_error,
- other) -
+ i915_reset_engine_count(global, other) -
threads[tmp].resets);
if (!err)
err = -EINVAL;
}
}
- if (global != i915_reset_count(&i915->gpu_error)) {
+ if (device != i915_reset_count(global)) {
pr_err("Global reset (count=%ld)!\n",
- i915_reset_count(&i915->gpu_error) - global);
+ i915_reset_count(global) - device);
if (!err)
err = -EINVAL;
}
@@ -985,21 +958,16 @@ unwind:
if (err)
break;
- mutex_lock(&i915->drm.struct_mutex);
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
err = -EIO;
- if (flags & TEST_ACTIVE) {
- mutex_lock(&i915->drm.struct_mutex);
+ if (flags & TEST_ACTIVE)
hang_fini(&h);
- mutex_unlock(&i915->drm.struct_mutex);
- }
return err;
}
@@ -1024,13 +992,13 @@ static int igt_reset_engines(void *arg)
},
{ }
};
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
typeof(*phases) *p;
int err;
for (p = phases; p->name; p++) {
if (p->flags & TEST_PRIORITY) {
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+ if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
continue;
}
@@ -1042,38 +1010,38 @@ static int igt_reset_engines(void *arg)
return 0;
}
-static u32 fake_hangcheck(struct drm_i915_private *i915,
- intel_engine_mask_t mask)
+static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask)
{
- u32 count = i915_reset_count(&i915->gpu_error);
+ u32 count = i915_reset_count(&gt->i915->gpu_error);
- i915_reset(i915, mask, NULL);
+ intel_gt_reset(gt, mask, NULL);
return count;
}
static int igt_reset_wait(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct i915_request *rq;
unsigned int reset_count;
struct hang h;
long timeout;
int err;
- if (!intel_engine_can_store_dword(i915->engine[RCS0]))
+ if (!engine || !intel_engine_can_store_dword(engine))
return 0;
/* Check that we detect a stuck waiter and issue a reset */
- igt_global_reset_lock(i915);
+ igt_global_reset_lock(gt);
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
+ err = hang_init(&h, gt);
if (err)
goto unlock;
- rq = hang_create_request(&h, i915->engine[RCS0]);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto fini;
@@ -1083,19 +1051,19 @@ static int igt_reset_wait(void *arg)
i915_request_add(rq);
if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to start request %llx, at %x\n",
__func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto out_rq;
}
- reset_count = fake_hangcheck(i915, ALL_ENGINES);
+ reset_count = fake_hangcheck(gt, ALL_ENGINES);
timeout = i915_request_wait(rq, 0, 10);
if (timeout < 0) {
@@ -1105,7 +1073,7 @@ static int igt_reset_wait(void *arg)
goto out_rq;
}
- if (i915_reset_count(&i915->gpu_error) == reset_count) {
+ if (i915_reset_count(global) == reset_count) {
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
goto out_rq;
@@ -1116,10 +1084,9 @@ out_rq:
fini:
hang_fini(&h);
unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
+ igt_global_reset_unlock(gt);
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
return -EIO;
return err;
@@ -1134,15 +1101,14 @@ static int evict_vma(void *data)
{
struct evict_vma *arg = data;
struct i915_address_space *vm = arg->vma->vm;
- struct drm_i915_private *i915 = vm->i915;
struct drm_mm_node evict = arg->vma->node;
int err;
complete(&arg->completion);
- mutex_lock(&i915->drm.struct_mutex);
+ mutex_lock(&vm->mutex);
err = i915_gem_evict_for_node(vm, &evict, 0);
- mutex_unlock(&i915->drm.struct_mutex);
+ mutex_unlock(&vm->mutex);
return err;
}
@@ -1150,57 +1116,62 @@ static int evict_vma(void *data)
static int evict_fence(void *data)
{
struct evict_vma *arg = data;
- struct drm_i915_private *i915 = arg->vma->vm->i915;
int err;
complete(&arg->completion);
- mutex_lock(&i915->drm.struct_mutex);
-
/* Mark the fence register as dirty to force the mmio update. */
err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512);
if (err) {
pr_err("Invalid Y-tiling settings; err:%d\n", err);
- goto out_unlock;
+ return err;
+ }
+
+ err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE);
+ if (err) {
+ pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err);
+ return err;
}
err = i915_vma_pin_fence(arg->vma);
+ i915_vma_unpin(arg->vma);
if (err) {
pr_err("Unable to pin Y-tiled fence; err:%d\n", err);
- goto out_unlock;
+ return err;
}
i915_vma_unpin_fence(arg->vma);
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
+ return 0;
}
-static int __igt_reset_evict_vma(struct drm_i915_private *i915,
+static int __igt_reset_evict_vma(struct intel_gt *gt,
struct i915_address_space *vm,
int (*fn)(void *),
unsigned int flags)
{
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct drm_i915_gem_object *obj;
struct task_struct *tsk = NULL;
struct i915_request *rq;
struct evict_vma arg;
struct hang h;
+ unsigned int pin_flags;
int err;
- if (!intel_engine_can_store_dword(i915->engine[RCS0]))
+ if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
+ return 0;
+
+ if (!engine || !intel_engine_can_store_dword(engine))
return 0;
/* Check that we can recover an unbind stuck on a hanging request */
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
+ err = hang_init(&h, gt);
if (err)
- goto unlock;
+ return err;
- obj = i915_gem_object_create_internal(i915, SZ_1M);
+ obj = i915_gem_object_create_internal(gt->i915, SZ_1M);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto fini;
@@ -1220,16 +1191,18 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
goto out_obj;
}
- rq = hang_create_request(&h, i915->engine[RCS0]);
+ rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_obj;
}
- err = i915_vma_pin(arg.vma, 0, 0,
- i915_vma_is_ggtt(arg.vma) ?
- PIN_GLOBAL | PIN_MAPPABLE :
- PIN_USER);
+ pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER;
+
+ if (flags & EXEC_OBJECT_NEEDS_FENCE)
+ pin_flags |= PIN_MAPPABLE;
+
+ err = i915_vma_pin(arg.vma, 0, 0, pin_flags);
if (err) {
i915_request_add(rq);
goto out_obj;
@@ -1246,7 +1219,10 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
}
i915_vma_lock(arg.vma);
- err = i915_vma_move_to_active(arg.vma, rq, flags);
+ err = i915_request_await_object(rq, arg.vma->obj,
+ flags & EXEC_OBJECT_WRITE);
+ if (err == 0)
+ err = i915_vma_move_to_active(arg.vma, rq, flags);
i915_vma_unlock(arg.vma);
if (flags & EXEC_OBJECT_NEEDS_FENCE)
@@ -1258,16 +1234,14 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
if (err)
goto out_rq;
- mutex_unlock(&i915->drm.struct_mutex);
-
if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to start request %llx, at %x\n",
__func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
goto out_reset;
}
@@ -1284,41 +1258,37 @@ static int __igt_reset_evict_vma(struct drm_i915_private *i915,
wait_for_completion(&arg.completion);
if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("igt/evict_vma kthread did not wait\n");
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
goto out_reset;
}
out_reset:
- igt_global_reset_lock(i915);
- fake_hangcheck(rq->i915, rq->engine->mask);
- igt_global_reset_unlock(i915);
+ igt_global_reset_lock(gt);
+ fake_hangcheck(gt, rq->engine->mask);
+ igt_global_reset_unlock(gt);
if (tsk) {
- struct igt_wedge_me w;
+ struct intel_wedge_me w;
/* The reset, even indirectly, should take less than 10ms. */
- igt_wedge_on_timeout(&w, i915, HZ / 10 /* 100ms timeout*/)
+ intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */)
err = kthread_stop(tsk);
put_task_struct(tsk);
}
- mutex_lock(&i915->drm.struct_mutex);
out_rq:
i915_request_put(rq);
out_obj:
i915_gem_object_put(obj);
fini:
hang_fini(&h);
-unlock:
- mutex_unlock(&i915->drm.struct_mutex);
-
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
return -EIO;
return err;
@@ -1326,56 +1296,48 @@ unlock:
static int igt_reset_evict_ggtt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
- return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
+ return __igt_reset_evict_vma(gt, &gt->ggtt->vm,
evict_vma, EXEC_OBJECT_WRITE);
}
static int igt_reset_evict_ppgtt(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct i915_gem_context *ctx;
- struct drm_file *file;
+ struct intel_gt *gt = arg;
+ struct i915_ppgtt *ppgtt;
int err;
- file = mock_file(i915);
- if (IS_ERR(file))
- return PTR_ERR(file);
+ /* aliasing == global gtt locking, covered above */
+ if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL)
+ return 0;
- mutex_lock(&i915->drm.struct_mutex);
- ctx = live_context(i915, file);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out;
- }
+ ppgtt = i915_ppgtt_create(gt);
+ if (IS_ERR(ppgtt))
+ return PTR_ERR(ppgtt);
- err = 0;
- if (ctx->vm) /* aliasing == global gtt locking, covered above */
- err = __igt_reset_evict_vma(i915, ctx->vm,
- evict_vma, EXEC_OBJECT_WRITE);
+ err = __igt_reset_evict_vma(gt, &ppgtt->vm,
+ evict_vma, EXEC_OBJECT_WRITE);
+ i915_vm_put(&ppgtt->vm);
-out:
- mock_file_free(i915, file);
return err;
}
static int igt_reset_evict_fence(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
- return __igt_reset_evict_vma(i915, &i915->ggtt.vm,
+ return __igt_reset_evict_vma(gt, &gt->ggtt->vm,
evict_fence, EXEC_OBJECT_NEEDS_FENCE);
}
-static int wait_for_others(struct drm_i915_private *i915,
+static int wait_for_others(struct intel_gt *gt,
struct intel_engine_cs *exclude)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
if (engine == exclude)
continue;
@@ -1388,7 +1350,8 @@ static int wait_for_others(struct drm_i915_private *i915,
static int igt_reset_queue(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct hang h;
@@ -1396,14 +1359,13 @@ static int igt_reset_queue(void *arg)
/* Check that we replay pending requests following a hang */
- igt_global_reset_lock(i915);
+ igt_global_reset_lock(gt);
- mutex_lock(&i915->drm.struct_mutex);
- err = hang_init(&h, i915);
+ err = hang_init(&h, gt);
if (err)
goto unlock;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *prev;
IGT_TIMEOUT(end_time);
unsigned int count;
@@ -1444,7 +1406,7 @@ static int igt_reset_queue(void *arg)
* (hangcheck), or we focus on resetting just one
* engine and so avoid repeatedly resetting innocents.
*/
- err = wait_for_others(i915, engine);
+ err = wait_for_others(gt, engine);
if (err) {
pr_err("%s(%s): Failed to idle other inactive engines after device reset\n",
__func__, engine->name);
@@ -1452,12 +1414,12 @@ static int igt_reset_queue(void *arg)
i915_request_put(prev);
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
goto fini;
}
if (!wait_until_running(&h, prev)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s(%s): Failed to start request %llx, at %x\n",
__func__, engine->name,
@@ -1468,13 +1430,13 @@ static int igt_reset_queue(void *arg)
i915_request_put(rq);
i915_request_put(prev);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto fini;
}
- reset_count = fake_hangcheck(i915, BIT(id));
+ reset_count = fake_hangcheck(gt, BIT(id));
if (prev->fence.error != -EIO) {
pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
@@ -1494,7 +1456,7 @@ static int igt_reset_queue(void *arg)
goto fini;
}
- if (i915_reset_count(&i915->gpu_error) == reset_count) {
+ if (i915_reset_count(global) == reset_count) {
pr_err("No GPU reset recorded!\n");
i915_request_put(rq);
i915_request_put(prev);
@@ -1509,11 +1471,11 @@ static int igt_reset_queue(void *arg)
pr_info("%s: Completed %d resets\n", engine->name, count);
*h.batch = MI_BATCH_BUFFER_END;
- i915_gem_chipset_flush(i915);
+ intel_gt_chipset_flush(engine->gt);
i915_request_put(prev);
- err = igt_flush_test(i915, I915_WAIT_LOCKED);
+ err = igt_flush_test(gt->i915);
if (err)
break;
}
@@ -1521,10 +1483,9 @@ static int igt_reset_queue(void *arg)
fini:
hang_fini(&h);
unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
+ igt_global_reset_unlock(gt);
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
return -EIO;
return err;
@@ -1532,26 +1493,25 @@ unlock:
static int igt_handle_error(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine = i915->engine[RCS0];
+ struct intel_gt *gt = arg;
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
+ struct intel_engine_cs *engine = gt->engine[RCS0];
struct hang h;
struct i915_request *rq;
- struct i915_gpu_state *error;
+ struct i915_gpu_coredump *error;
int err;
/* Check that we can issue a global GPU and engine reset */
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
- err = hang_init(&h, i915);
+ err = hang_init(&h, gt);
if (err)
- goto err_unlock;
+ return err;
rq = hang_create_request(&h, engine);
if (IS_ERR(rq)) {
@@ -1563,28 +1523,24 @@ static int igt_handle_error(void *arg)
i915_request_add(rq);
if (!wait_until_running(&h, rq)) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to start request %llx, at %x\n",
__func__, rq->fence.seqno, hws_seqno(&h, rq));
intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_request;
}
- mutex_unlock(&i915->drm.struct_mutex);
-
/* Temporarily disable error capture */
- error = xchg(&i915->gpu_error.first_error, (void *)-1);
+ error = xchg(&global->first_error, (void *)-1);
- i915_handle_error(i915, engine->mask, 0, NULL);
+ intel_gt_handle_error(gt, engine->mask, 0, NULL);
- xchg(&i915->gpu_error.first_error, error);
-
- mutex_lock(&i915->drm.struct_mutex);
+ xchg(&global->first_error, error);
if (rq->fence.error != -EIO) {
pr_err("Guilty request not identified!\n");
@@ -1596,8 +1552,6 @@ err_request:
i915_request_put(rq);
err_fini:
hang_fini(&h);
-err_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
@@ -1611,10 +1565,10 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
GEM_TRACE("i915_reset_engine(%s:%s) under %s\n",
engine->name, mode, p->name);
- tasklet_disable_nosync(t);
+ tasklet_disable(t);
p->critical_section_begin();
- err = i915_reset_engine(engine, NULL);
+ err = intel_engine_reset(engine, NULL);
p->critical_section_end();
tasklet_enable(t);
@@ -1629,7 +1583,6 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
const struct igt_atomic_section *p)
{
- struct drm_i915_private *i915 = engine->i915;
struct i915_request *rq;
struct hang h;
int err;
@@ -1638,7 +1591,7 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
if (err)
return err;
- err = hang_init(&h, i915);
+ err = hang_init(&h, engine->gt);
if (err)
return err;
@@ -1657,16 +1610,16 @@ static int igt_atomic_reset_engine(struct intel_engine_cs *engine,
pr_err("%s(%s): Failed to start request %llx, at %x\n",
__func__, engine->name,
rq->fence.seqno, hws_seqno(&h, rq));
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(engine->gt);
err = -EIO;
}
if (err == 0) {
- struct igt_wedge_me w;
+ struct intel_wedge_me w;
- igt_wedge_on_timeout(&w, i915, HZ / 20 /* 50ms timeout*/)
+ intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */)
i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(engine->gt))
err = -EIO;
}
@@ -1678,30 +1631,29 @@ out:
static int igt_reset_engines_atomic(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
int err = 0;
/* Check that the engines resets are usable from atomic context */
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- igt_global_reset_lock(i915);
- mutex_lock(&i915->drm.struct_mutex);
+ igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
- if (!igt_force_reset(i915))
+ if (!igt_force_reset(gt))
goto unlock;
for (p = igt_atomic_phases; p->name; p++) {
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
err = igt_atomic_reset_engine(engine, p);
if (err)
goto out;
@@ -1710,11 +1662,9 @@ static int igt_reset_engines_atomic(void *arg)
out:
/* As we poke around the guts, do a full reset before continuing. */
- igt_force_reset(i915);
-
+ igt_force_reset(gt);
unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
+ igt_global_reset_unlock(gt);
return err;
}
@@ -1736,28 +1686,21 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_reset_evict_fence),
SUBTEST(igt_handle_error),
};
+ struct intel_gt *gt = &i915->gt;
intel_wakeref_t wakeref;
- bool saved_hangcheck;
int err;
- if (!intel_has_gpu_reset(i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(gt))
return -EIO; /* we're long past hope of a successful reset */
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
- saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
- drain_delayed_work(&i915->gpu_error.hangcheck_work); /* flush param */
-
- err = i915_subtests(tests, i915);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- mutex_lock(&i915->drm.struct_mutex);
- igt_flush_test(i915, I915_WAIT_LOCKED);
- mutex_unlock(&i915->drm.struct_mutex);
+ err = intel_gt_live_subtests(tests, gt);
- i915_modparams.enable_hangcheck = saved_hangcheck;
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.c b/drivers/gpu/drm/i915/gt/selftest_llc.c
new file mode 100644
index 000000000000..fd3770e48ac7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.c
@@ -0,0 +1,80 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_pm.h" /* intel_gpu_freq() */
+#include "selftest_llc.h"
+#include "intel_rps.h"
+
+static int gen6_verify_ring_freq(struct intel_llc *llc)
+{
+ struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
+ struct ia_constants consts;
+ intel_wakeref_t wakeref;
+ unsigned int gpu_freq;
+ int err = 0;
+
+ wakeref = intel_runtime_pm_get(llc_to_gt(llc)->uncore->rpm);
+
+ if (!get_ia_constants(llc, &consts)) {
+ err = -ENODEV;
+ goto out_rpm;
+ }
+
+ for (gpu_freq = consts.min_gpu_freq;
+ gpu_freq <= consts.max_gpu_freq;
+ gpu_freq++) {
+ struct intel_rps *rps = &llc_to_gt(llc)->rps;
+
+ unsigned int ia_freq, ring_freq, found;
+ u32 val;
+
+ calc_ia_freq(llc, gpu_freq, &consts, &ia_freq, &ring_freq);
+
+ val = gpu_freq;
+ if (sandybridge_pcode_read(i915,
+ GEN6_PCODE_READ_MIN_FREQ_TABLE,
+ &val, NULL)) {
+ pr_err("Failed to read freq table[%d], range [%d, %d]\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq);
+ err = -ENXIO;
+ break;
+ }
+
+ found = (val >> 0) & 0xff;
+ if (found != ia_freq) {
+ pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected CPU freq, found %d, expected %d\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+ intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ found, ia_freq);
+ err = -EINVAL;
+ break;
+ }
+
+ found = (val >> 8) & 0xff;
+ if (found != ring_freq) {
+ pr_err("Min freq table(%d/[%d, %d]):%dMHz did not match expected ring freq, found %d, expected %d\n",
+ gpu_freq, consts.min_gpu_freq, consts.max_gpu_freq,
+ intel_gpu_freq(rps, gpu_freq * (INTEL_GEN(i915) >= 9 ? GEN9_FREQ_SCALER : 1)),
+ found, ring_freq);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+out_rpm:
+ intel_runtime_pm_put(llc_to_gt(llc)->uncore->rpm, wakeref);
+ return err;
+}
+
+int st_llc_verify(struct intel_llc *llc)
+{
+ int err = 0;
+
+ if (HAS_LLC(llc_to_gt(llc)->i915))
+ err = gen6_verify_ring_freq(llc);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_llc.h b/drivers/gpu/drm/i915/gt/selftest_llc.h
new file mode 100644
index 000000000000..873f896e72f2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_llc.h
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_LLC_H
+#define SELFTEST_LLC_H
+
+struct intel_llc;
+
+int st_llc_verify(struct intel_llc *llc);
+
+#endif /* SELFTEST_LLC_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 401e8b539297..65718ca2326e 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -7,6 +7,7 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_reset.h"
#include "i915_selftest.h"
@@ -19,75 +20,776 @@
#include "gem/selftests/igt_gem_utils.h"
#include "gem/selftests/mock_context.h"
+#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
+#define NUM_GPR_DW (16 * 2) /* each GPR is 2 dwords */
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static void engine_heartbeat_disable(struct intel_engine_cs *engine,
+ unsigned long *saved)
+{
+ *saved = engine->props.heartbeat_interval_ms;
+ engine->props.heartbeat_interval_ms = 0;
+
+ intel_engine_pm_get(engine);
+ intel_engine_park_heartbeat(engine);
+}
+
+static void engine_heartbeat_enable(struct intel_engine_cs *engine,
+ unsigned long saved)
+{
+ intel_engine_pm_put(engine);
+
+ engine->props.heartbeat_interval_ms = saved;
+}
+
static int live_sanitycheck(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
struct igt_spinner spin;
- intel_wakeref_t wakeref;
- int err = -ENOMEM;
+ int err = 0;
- if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+ if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin, i915))
- goto err_unlock;
-
- ctx = kernel_context(i915);
- if (!ctx)
- goto err_spin;
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
struct i915_request *rq;
- rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto err_ctx;
+ goto out_ctx;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin, rq)) {
GEM_TRACE("spinner failed to start\n");
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
- goto err_ctx;
+ goto out_ctx;
}
igt_spinner_end(&spin);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
- goto err_ctx;
+ goto out_ctx;
}
+
+out_ctx:
+ intel_context_put(ce);
+ if (err)
+ break;
}
+ igt_spinner_fini(&spin);
+ return err;
+}
+
+static int live_unlite_restore(struct intel_gt *gt, int prio)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we can correctly context switch between 2 instances
+ * on the same engine from the same parent context.
+ */
+
+ if (igt_spinner_init(&spin, gt))
+ return err;
+
err = 0;
-err_ctx:
- kernel_context_close(ctx);
-err_spin:
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce[2] = {};
+ struct i915_request *rq[2];
+ struct igt_live_test t;
+ unsigned long saved;
+ int n;
+
+ if (prio && !intel_engine_has_preemption(engine))
+ continue;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+ err = -EIO;
+ break;
+ }
+ engine_heartbeat_disable(engine, &saved);
+
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ struct intel_context *tmp;
+
+ tmp = intel_context_create(engine);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto err_ce;
+ }
+
+ err = intel_context_pin(tmp);
+ if (err) {
+ intel_context_put(tmp);
+ goto err_ce;
+ }
+
+ /*
+ * Setup the pair of contexts such that if we
+ * lite-restore using the RING_TAIL from ce[1] it
+ * will execute garbage from ce[0]->ring.
+ */
+ memset(tmp->ring->vaddr,
+ POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+ tmp->ring->vma->size);
+
+ ce[n] = tmp;
+ }
+ GEM_BUG_ON(!ce[1]->ring->size);
+ intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+ __execlists_update_reg_state(ce[1], engine);
+
+ rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ goto err_ce;
+ }
+
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
+
+ if (!igt_wait_for_spinner(&spin, rq[0])) {
+ i915_request_put(rq[0]);
+ goto err_ce;
+ }
+
+ rq[1] = i915_request_create(ce[1]);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ i915_request_put(rq[0]);
+ goto err_ce;
+ }
+
+ if (!prio) {
+ /*
+ * Ensure we do the switch to ce[1] on completion.
+ *
+ * rq[0] is already submitted, so this should reduce
+ * to a no-op (a wait on a request on the same engine
+ * uses the submit fence, not the completion fence),
+ * but it will install a dependency on rq[1] for rq[0]
+ * that will prevent the pair being reordered by
+ * timeslicing.
+ */
+ i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ }
+
+ i915_request_get(rq[1]);
+ i915_request_add(rq[1]);
+ GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
+ i915_request_put(rq[0]);
+
+ if (prio) {
+ struct i915_sched_attr attr = {
+ .priority = prio,
+ };
+
+ /* Alternatively preempt the spinner with ce[1] */
+ engine->schedule(rq[1], &attr);
+ }
+
+ /* And switch back to ce[0] for good measure */
+ rq[0] = i915_request_create(ce[0]);
+ if (IS_ERR(rq[0])) {
+ err = PTR_ERR(rq[0]);
+ i915_request_put(rq[1]);
+ goto err_ce;
+ }
+
+ i915_request_await_dma_fence(rq[0], &rq[1]->fence);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+
+err_ce:
+ tasklet_kill(&engine->execlists.tasklet); /* flush submission */
+ igt_spinner_end(&spin);
+ for (n = 0; n < ARRAY_SIZE(ce); n++) {
+ if (IS_ERR_OR_NULL(ce[n]))
+ break;
+
+ intel_context_unpin(ce[n]);
+ intel_context_put(ce[n]);
+ }
+
+ engine_heartbeat_enable(engine, saved);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ break;
+ }
+
+ igt_spinner_fini(&spin);
+ return err;
+}
+
+static int live_unlite_switch(void *arg)
+{
+ return live_unlite_restore(arg, 0);
+}
+
+static int live_unlite_preempt(void *arg)
+{
+ return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
+}
+
+static int live_hold_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct igt_spinner spin;
+ int err = 0;
+
+ /*
+ * In order to support offline error capture for fast preempt reset,
+ * we need to decouple the guilty request and ensure that it and its
+ * descendents are not executed while the capture is in progress.
+ */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (igt_spinner_init(&spin, gt))
+ return -ENOMEM;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ unsigned long heartbeat;
+ struct i915_request *rq;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ engine_heartbeat_disable(engine, &heartbeat);
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out;
+ }
+
+ /* We have our request executing, now remove it and reset */
+
+ if (test_and_set_bit(I915_RESET_ENGINE + id,
+ &gt->reset.flags)) {
+ intel_gt_set_wedged(gt);
+ err = -EBUSY;
+ goto out;
+ }
+ tasklet_disable(&engine->execlists.tasklet);
+
+ engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ i915_request_get(rq);
+ execlists_hold(engine, rq);
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ intel_engine_reset(engine, NULL);
+ GEM_BUG_ON(rq->fence.error != -EIO);
+
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+ &gt->reset.flags);
+
+ /* Check that we do not resubmit the held request */
+ if (!i915_request_wait(rq, 0, HZ / 5)) {
+ pr_err("%s: on hold request completed!\n",
+ engine->name);
+ i915_request_put(rq);
+ err = -EIO;
+ goto out;
+ }
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ /* But is resubmitted on release */
+ execlists_unhold(engine, rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("%s: held request did not complete!\n",
+ engine->name);
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ }
+ i915_request_put(rq);
+
+out:
+ engine_heartbeat_enable(engine, heartbeat);
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
igt_spinner_fini(&spin);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int
+emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 10);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_NEQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_ggtt_offset(vma) + 4 * idx;
+ *cs++ = 0;
+
+ if (idx > 0) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
+ *cs++ = 0;
+ *cs++ = 1;
+ } else {
+ *cs++ = MI_NOOP;
+ *cs++ = MI_NOOP;
+ *cs++ = MI_NOOP;
+ *cs++ = MI_NOOP;
+ }
+
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ intel_ring_advance(rq, cs);
+ return 0;
+}
+
+static struct i915_request *
+semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto out_ce;
+
+ err = 0;
+ if (rq->engine->emit_init_breadcrumb)
+ err = rq->engine->emit_init_breadcrumb(rq);
+ if (err == 0)
+ err = emit_semaphore_chain(rq, vma, idx);
+ if (err == 0)
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (err)
+ rq = ERR_PTR(err);
+
+out_ce:
+ intel_context_put(ce);
+ return rq;
+}
+
+static int
+release_queue(struct intel_engine_cs *engine,
+ struct i915_vma *vma,
+ int idx, int prio)
+{
+ struct i915_sched_attr attr = {
+ .priority = prio,
+ };
+ struct i915_request *rq;
+ u32 *cs;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ intel_ring_advance(rq, cs);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ local_bh_disable();
+ engine->schedule(rq, &attr);
+ local_bh_enable(); /* kick tasklet */
+
+ i915_request_put(rq);
+
+ return 0;
+}
+
+static int
+slice_semaphore_queue(struct intel_engine_cs *outer,
+ struct i915_vma *vma,
+ int count)
+{
+ struct intel_engine_cs *engine;
+ struct i915_request *head;
+ enum intel_engine_id id;
+ int err, i, n = 0;
+
+ head = semaphore_queue(outer, vma, n++);
+ if (IS_ERR(head))
+ return PTR_ERR(head);
+
+ for_each_engine(engine, outer->gt, id) {
+ for (i = 0; i < count; i++) {
+ struct i915_request *rq;
+
+ rq = semaphore_queue(engine, vma, n++);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ i915_request_put(rq);
+ }
+ }
+
+ err = release_queue(outer, vma, n, INT_MAX);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(head, 0,
+ 2 * RUNTIME_INFO(outer->i915)->num_engines * (count + 2) * (count + 3)) < 0) {
+ pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n",
+ count, n);
+ GEM_TRACE_DUMP();
+ intel_gt_set_wedged(outer->gt);
+ err = -EIO;
+ }
+
+out:
+ i915_request_put(head);
+ return err;
+}
+
+static int live_timeslice_preempt(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err = 0;
+ int count;
+
+ /*
+ * If a request takes too long, we would like to give other users
+ * a fair go on the GPU. In particular, users may create batches
+ * that wait upon external input, where that input may even be
+ * supplied by another GPU job. To avoid blocking forever, we
+ * need to preempt the current task and replace it with another
+ * ready task.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_map;
+
+ for_each_prime_number_from(count, 1, 16) {
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ memset(vaddr, 0, PAGE_SIZE);
+
+ engine_heartbeat_disable(engine, &saved);
+ err = slice_semaphore_queue(engine, vma, count);
+ engine_heartbeat_enable(engine, saved);
+ if (err)
+ goto err_pin;
+
+ if (igt_flush_test(gt->i915)) {
+ err = -EIO;
+ goto err_pin;
+ }
+ }
+ }
+
+err_pin:
+ i915_vma_unpin(vma);
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
+ return err;
+}
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static int wait_for_submit(struct intel_engine_cs *engine,
+ struct i915_request *rq,
+ unsigned long timeout)
+{
+ timeout += jiffies;
+ do {
+ cond_resched();
+ intel_engine_flush_submission(engine);
+ if (i915_request_is_active(rq))
+ return 0;
+ } while (time_before(jiffies, timeout));
+
+ return -ETIME;
+}
+
+static long timeslice_threshold(const struct intel_engine_cs *engine)
+{
+ return 2 * msecs_to_jiffies_timeout(timeslice(engine)) + 1;
+}
+
+static int live_timeslice_queue(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct i915_vma *vma;
+ void *vaddr;
+ int err = 0;
+
+ /*
+ * Make sure that even if ELSP[0] and ELSP[1] are filled with
+ * timeslicing between them disabled, we *do* enable timeslicing
+ * if the queue demands it. (Normally, we do not submit if
+ * ELSP[1] is already occupied, so must rely on timeslicing to
+ * eject ELSP[0] in favour of the queue.)
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION))
+ return 0;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_map;
+
+ for_each_engine(engine, gt, id) {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
+ };
+ struct i915_request *rq, *nop;
+ unsigned long saved;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ engine_heartbeat_disable(engine, &saved);
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /* ELSP[0]: semaphore wait */
+ rq = semaphore_queue(engine, vma, 0);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_heartbeat;
+ }
+ engine->schedule(rq, &attr);
+ err = wait_for_submit(engine, rq, HZ / 2);
+ if (err) {
+ pr_err("%s: Timed out trying to submit semaphores\n",
+ engine->name);
+ goto err_rq;
+ }
+
+ /* ELSP[1]: nop request */
+ nop = nop_request(engine);
+ if (IS_ERR(nop)) {
+ err = PTR_ERR(nop);
+ goto err_rq;
+ }
+ err = wait_for_submit(engine, nop, HZ / 2);
+ i915_request_put(nop);
+ if (err) {
+ pr_err("%s: Timed out trying to submit nop\n",
+ engine->name);
+ goto err_rq;
+ }
+
+ GEM_BUG_ON(i915_request_completed(rq));
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ /* Queue: semaphore signal, matching priority as semaphore */
+ err = release_queue(engine, vma, 1, effective_prio(rq));
+ if (err)
+ goto err_rq;
+
+ intel_engine_flush_submission(engine);
+ if (!READ_ONCE(engine->execlists.timer.expires) &&
+ !i915_request_completed(rq)) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ GEM_TRACE_ERR("%s: Failed to enable timeslicing!\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+ GEM_TRACE_DUMP();
+
+ memset(vaddr, 0xff, PAGE_SIZE);
+ err = -EINVAL;
+ }
+
+ /* Timeslice every jiffy, so within 2 we should signal */
+ if (i915_request_wait(rq, 0, timeslice_threshold(engine)) < 0) {
+ struct drm_printer p =
+ drm_info_printer(gt->i915->drm.dev);
+
+ pr_err("%s: Failed to timeslice into queue\n",
+ engine->name);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ memset(vaddr, 0xff, PAGE_SIZE);
+ err = -EIO;
+ }
+err_rq:
+ i915_request_put(rq);
+err_heartbeat:
+ engine_heartbeat_enable(engine, saved);
+ if (err)
+ break;
+ }
+
+ i915_vma_unpin(vma);
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
return err;
}
static int live_busywait_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
u32 *map;
@@ -96,22 +798,19 @@ static int live_busywait_preempt(void *arg)
* preempt the busywaits used to synchronise between rings.
*/
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
- goto err_unlock;
+ return -ENOMEM;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_ctx_lo;
@@ -123,7 +822,7 @@ static int live_busywait_preempt(void *arg)
goto err_obj;
}
- vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_map;
@@ -133,15 +832,18 @@ static int live_busywait_preempt(void *arg)
if (err)
goto err_map;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *lo, *hi;
struct igt_live_test t;
u32 *cs;
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
if (!intel_engine_can_store_dword(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_vma;
}
@@ -184,15 +886,19 @@ static int live_busywait_preempt(void *arg)
*cs++ = 0;
intel_ring_advance(lo, cs);
+
+ i915_request_get(lo);
i915_request_add(lo);
if (wait_for(READ_ONCE(*map), 10)) {
+ i915_request_put(lo);
err = -ETIMEDOUT;
goto err_vma;
}
/* Low priority request should be busywaiting now */
if (i915_request_wait(lo, 0, 1) != -ETIME) {
+ i915_request_put(lo);
pr_err("%s: Busywaiting request did not!\n",
engine->name);
err = -EIO;
@@ -202,6 +908,7 @@ static int live_busywait_preempt(void *arg)
hi = igt_request_alloc(ctx_hi, engine);
if (IS_ERR(hi)) {
err = PTR_ERR(hi);
+ i915_request_put(lo);
goto err_vma;
}
@@ -209,6 +916,7 @@ static int live_busywait_preempt(void *arg)
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
i915_request_add(hi);
+ i915_request_put(lo);
goto err_vma;
}
@@ -221,7 +929,7 @@ static int live_busywait_preempt(void *arg)
i915_request_add(hi);
if (i915_request_wait(lo, 0, HZ / 5) < 0) {
- struct drm_printer p = drm_info_printer(i915->drm.dev);
+ struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to preempt semaphore busywait!\n",
engine->name);
@@ -229,11 +937,13 @@ static int live_busywait_preempt(void *arg)
intel_engine_dump(engine, &p, "%s\n", engine->name);
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ i915_request_put(lo);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_vma;
}
GEM_BUG_ON(READ_ONCE(*map));
+ i915_request_put(lo);
if (igt_live_test_end(&t)) {
err = -EIO;
@@ -252,65 +962,74 @@ err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
+static struct i915_request *
+spinner_create_request(struct igt_spinner *spin,
+ struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
+ u32 arb)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+
+ ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ rq = igt_spinner_create_request(spin, ce, arb);
+ intel_context_put(ce);
+ return rq;
+}
+
static int live_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+ if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
pr_err("Logical preemption supported, but not exposed\n");
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin_hi, i915))
- goto err_unlock;
+ if (igt_spinner_init(&spin_hi, gt))
+ return -ENOMEM;
- if (igt_spinner_init(&spin_lo, i915))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
- rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ctx_lo;
@@ -320,13 +1039,13 @@ static int live_preempt(void *arg)
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
- rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
@@ -337,7 +1056,7 @@ static int live_preempt(void *arg)
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -360,58 +1079,53 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
}
static int live_late_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {};
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin_hi, i915))
- goto err_unlock;
+ if (igt_spinner_init(&spin_hi, gt))
+ return -ENOMEM;
- if (igt_spinner_init(&spin_lo, i915))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
- for_each_engine(engine, i915, id) {
+ /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
+ ctx_lo->sched.priority = I915_USER_PRIORITY(1);
+
+ for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
- rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ctx_lo;
@@ -423,8 +1137,8 @@ static int live_late_preempt(void *arg)
goto err_wedged;
}
- rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_NOOP);
+ rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+ MI_NOOP);
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
@@ -464,16 +1178,12 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
@@ -483,14 +1193,13 @@ struct preempt_client {
struct i915_gem_context *ctx;
};
-static int preempt_client_init(struct drm_i915_private *i915,
- struct preempt_client *c)
+static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
{
- c->ctx = kernel_context(i915);
+ c->ctx = kernel_context(gt->i915);
if (!c->ctx)
return -ENOMEM;
- if (igt_spinner_init(&c->spin, i915))
+ if (igt_spinner_init(&c->spin, gt))
goto err_ctx;
return 0;
@@ -506,16 +1215,435 @@ static void preempt_client_fini(struct preempt_client *c)
kernel_context_close(c->ctx);
}
+static int live_nopreempt(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct preempt_client a, b;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * Verify that we can disable preemption for an individual request
+ * that may be being observed and not want to be interrupted.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (preempt_client_init(gt, &a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &b))
+ goto err_client_a;
+ b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX);
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq_a, *rq_b;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ engine->execlists.preempt_hang.count = 0;
+
+ rq_a = spinner_create_request(&a.spin,
+ a.ctx, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq_a)) {
+ err = PTR_ERR(rq_a);
+ goto err_client_b;
+ }
+
+ /* Low priority client, but unpreemptable! */
+ __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
+
+ i915_request_add(rq_a);
+ if (!igt_wait_for_spinner(&a.spin, rq_a)) {
+ pr_err("First client failed to start\n");
+ goto err_wedged;
+ }
+
+ rq_b = spinner_create_request(&b.spin,
+ b.ctx, engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq_b)) {
+ err = PTR_ERR(rq_b);
+ goto err_client_b;
+ }
+
+ i915_request_add(rq_b);
+
+ /* B is much more important than A! (But A is unpreemptable.) */
+ GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
+
+ /* Wait long enough for preemption and timeslicing */
+ if (igt_wait_for_spinner(&b.spin, rq_b)) {
+ pr_err("Second client started too early!\n");
+ goto err_wedged;
+ }
+
+ igt_spinner_end(&a.spin);
+
+ if (!igt_wait_for_spinner(&b.spin, rq_b)) {
+ pr_err("Second client failed to start\n");
+ goto err_wedged;
+ }
+
+ igt_spinner_end(&b.spin);
+
+ if (engine->execlists.preempt_hang.count) {
+ pr_err("Preemption recorded x%d; should have been suppressed!\n",
+ engine->execlists.preempt_hang.count);
+ err = -EINVAL;
+ goto err_wedged;
+ }
+
+ if (igt_flush_test(gt->i915))
+ goto err_wedged;
+ }
+
+ err = 0;
+err_client_b:
+ preempt_client_fini(&b);
+err_client_a:
+ preempt_client_fini(&a);
+ return err;
+
+err_wedged:
+ igt_spinner_end(&b.spin);
+ igt_spinner_end(&a.spin);
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto err_client_b;
+}
+
+struct live_preempt_cancel {
+ struct intel_engine_cs *engine;
+ struct preempt_client a, b;
+};
+
+static int __cancel_active0(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq;
+ struct igt_live_test t;
+ int err;
+
+ /* Preempt cancel of ELSP0 */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ intel_context_set_banned(rq->context);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_active1(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq[2] = {};
+ struct igt_live_test t;
+ int err;
+
+ /* Preempt cancel of ELSP1 */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ rq[0] = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_NOOP); /* no preemption */
+ if (IS_ERR(rq[0]))
+ return PTR_ERR(rq[0]);
+
+ clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
+ rq[1] = spinner_create_request(&arg->b.spin,
+ arg->b.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ goto out;
+ }
+
+ clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+ i915_request_get(rq[1]);
+ err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ i915_request_add(rq[1]);
+ if (err)
+ goto out;
+
+ intel_context_set_banned(rq[1]->context);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ igt_spinner_end(&arg->a.spin);
+ if (i915_request_wait(rq[1], 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[0]->fence.error != 0) {
+ pr_err("Normal inflight0 request did not complete\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[1]->fence.error != -EIO) {
+ pr_err("Cancelled inflight1 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_queued(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq[3] = {};
+ struct igt_live_test t;
+ int err;
+
+ /* Full ELSP and one in the wings */
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ if (igt_live_test_begin(&t, arg->engine->i915,
+ __func__, arg->engine->name))
+ return -EIO;
+
+ rq[0] = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[0]))
+ return PTR_ERR(rq[0]);
+
+ clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
+ i915_request_get(rq[0]);
+ i915_request_add(rq[0]);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
+ rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
+ if (IS_ERR(rq[1])) {
+ err = PTR_ERR(rq[1]);
+ goto out;
+ }
+
+ clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
+ i915_request_get(rq[1]);
+ err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
+ i915_request_add(rq[1]);
+ if (err)
+ goto out;
+
+ rq[2] = spinner_create_request(&arg->b.spin,
+ arg->a.ctx, arg->engine,
+ MI_ARB_CHECK);
+ if (IS_ERR(rq[2])) {
+ err = PTR_ERR(rq[2]);
+ goto out;
+ }
+
+ i915_request_get(rq[2]);
+ err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
+ i915_request_add(rq[2]);
+ if (err)
+ goto out;
+
+ intel_context_set_banned(rq[2]->context);
+ err = intel_engine_pulse(arg->engine);
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq[0]->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[1]->fence.error != 0) {
+ pr_err("Normal inflight1 request did not complete\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (rq[2]->fence.error != -EIO) {
+ pr_err("Cancelled queued request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq[2]);
+ i915_request_put(rq[1]);
+ i915_request_put(rq[0]);
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ return err;
+}
+
+static int __cancel_hostile(struct live_preempt_cancel *arg)
+{
+ struct i915_request *rq;
+ int err;
+
+ /* Preempt cancel non-preemptible spinner in ELSP0 */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
+ rq = spinner_create_request(&arg->a.spin,
+ arg->a.ctx, arg->engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ clear_bit(CONTEXT_BANNED, &rq->context->flags);
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
+ err = -EIO;
+ goto out;
+ }
+
+ intel_context_set_banned(rq->context);
+ err = intel_engine_pulse(arg->engine); /* force reset */
+ if (err)
+ goto out;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -EIO;
+ goto out;
+ }
+
+ if (rq->fence.error != -EIO) {
+ pr_err("Cancelled inflight0 request did not report -EIO\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ i915_request_put(rq);
+ if (igt_flush_test(arg->engine->i915))
+ err = -EIO;
+ return err;
+}
+
+static int live_preempt_cancel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct live_preempt_cancel data;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * To cancel an inflight context, we need to first remove it from the
+ * GPU. That sounds like preemption! Plus a little bit of bookkeeping.
+ */
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (preempt_client_init(gt, &data.a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &data.b))
+ goto err_client_a;
+
+ for_each_engine(data.engine, gt, id) {
+ if (!intel_engine_has_preemption(data.engine))
+ continue;
+
+ err = __cancel_active0(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_active1(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_queued(&data);
+ if (err)
+ goto err_wedged;
+
+ err = __cancel_hostile(&data);
+ if (err)
+ goto err_wedged;
+ }
+
+ err = 0;
+err_client_b:
+ preempt_client_fini(&data.b);
+err_client_a:
+ preempt_client_fini(&data.a);
+ return err;
+
+err_wedged:
+ GEM_TRACE_DUMP();
+ igt_spinner_end(&data.b.spin);
+ igt_spinner_end(&data.a.spin);
+ intel_gt_set_wedged(gt);
+ goto err_client_b;
+}
+
static int live_suppress_self_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX)
};
struct preempt_client a, b;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -525,49 +1653,58 @@ static int live_suppress_self_preempt(void *arg)
* completion event.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0; /* presume black blox */
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ if (intel_vgpu_active(gt->i915))
+ return 0; /* GVT forces single port & request submission */
- if (preempt_client_init(i915, &a))
- goto err_unlock;
- if (preempt_client_init(i915, &b))
+ if (preempt_client_init(gt, &a))
+ return -ENOMEM;
+ if (preempt_client_init(gt, &b))
goto err_client_a;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
int depth;
if (!intel_engine_has_preemption(engine))
continue;
+ if (igt_flush_test(gt->i915))
+ goto err_wedged;
+
+ intel_engine_pm_get(engine);
engine->execlists.preempt_hang.count = 0;
- rq_a = igt_spinner_create_request(&a.spin,
- a.ctx, engine,
- MI_NOOP);
+ rq_a = spinner_create_request(&a.spin,
+ a.ctx, engine,
+ MI_NOOP);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
+ intel_engine_pm_put(engine);
goto err_client_b;
}
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
+ intel_engine_pm_put(engine);
goto err_wedged;
}
+ /* Keep postponing the timer to avoid premature slicing */
+ mod_timer(&engine->execlists.timer, jiffies + HZ);
for (depth = 0; depth < 8; depth++) {
- rq_b = igt_spinner_create_request(&b.spin,
- b.ctx, engine,
- MI_NOOP);
+ rq_b = spinner_create_request(&b.spin,
+ b.ctx, engine,
+ MI_NOOP);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
+ intel_engine_pm_put(engine);
goto err_client_b;
}
i915_request_add(rq_b);
@@ -578,6 +1715,7 @@ static int live_suppress_self_preempt(void *arg)
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
+ intel_engine_pm_put(engine);
goto err_wedged;
}
@@ -587,14 +1725,17 @@ static int live_suppress_self_preempt(void *arg)
igt_spinner_end(&a.spin);
if (engine->execlists.preempt_hang.count) {
- pr_err("Preemption recorded x%d, depth %d; should have been suppressed!\n",
+ pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
+ engine->name,
engine->execlists.preempt_hang.count,
depth);
+ intel_engine_pm_put(engine);
err = -EINVAL;
goto err_client_b;
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ intel_engine_pm_put(engine);
+ if (igt_flush_test(gt->i915))
goto err_wedged;
}
@@ -603,17 +1744,12 @@ err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
@@ -632,9 +1768,13 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
if (!rq)
return NULL;
- INIT_LIST_HEAD(&rq->active_list);
rq->engine = engine;
+ spin_lock_init(&rq->lock);
+ INIT_LIST_HEAD(&rq->fence.cb_list);
+ rq->fence.lock = &rq->lock;
+ rq->fence.ops = &i915_fence_ops;
+
i915_sched_node_init(&rq->sched);
/* mark this request as permanently incomplete */
@@ -646,6 +1786,10 @@ static struct i915_request *dummy_request(struct intel_engine_cs *engine)
i915_sw_fence_init(&rq->submit, dummy_notify);
set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+ spin_lock_init(&rq->lock);
+ rq->fence.lock = &rq->lock;
+ INIT_LIST_HEAD(&rq->fence.cb_list);
+
return rq;
}
@@ -665,11 +1809,11 @@ static void dummy_request_free(struct i915_request *dummy)
static int live_suppress_wait_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct preempt_client client[4];
+ struct i915_request *rq[ARRAY_SIZE(client)] = {};
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
int i;
@@ -679,22 +1823,19 @@ static int live_suppress_wait_preempt(void *arg)
* not needlessly generate preempt-to-idle cycles.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (preempt_client_init(i915, &client[0])) /* ELSP[0] */
- goto err_unlock;
- if (preempt_client_init(i915, &client[1])) /* ELSP[1] */
+ if (preempt_client_init(gt, &client[0])) /* ELSP[0] */
+ return -ENOMEM;
+ if (preempt_client_init(gt, &client[1])) /* ELSP[1] */
goto err_client_0;
- if (preempt_client_init(i915, &client[2])) /* head of queue */
+ if (preempt_client_init(gt, &client[2])) /* head of queue */
goto err_client_1;
- if (preempt_client_init(i915, &client[3])) /* bystander */
+ if (preempt_client_init(gt, &client[3])) /* bystander */
goto err_client_2;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
int depth;
if (!intel_engine_has_preemption(engine))
@@ -704,7 +1845,6 @@ static int live_suppress_wait_preempt(void *arg)
continue;
for (depth = 0; depth < ARRAY_SIZE(client); depth++) {
- struct i915_request *rq[ARRAY_SIZE(client)];
struct i915_request *dummy;
engine->execlists.preempt_hang.count = 0;
@@ -714,18 +1854,22 @@ static int live_suppress_wait_preempt(void *arg)
goto err_client_3;
for (i = 0; i < ARRAY_SIZE(client); i++) {
- rq[i] = igt_spinner_create_request(&client[i].spin,
- client[i].ctx, engine,
- MI_NOOP);
- if (IS_ERR(rq[i])) {
- err = PTR_ERR(rq[i]);
+ struct i915_request *this;
+
+ this = spinner_create_request(&client[i].spin,
+ client[i].ctx, engine,
+ MI_NOOP);
+ if (IS_ERR(this)) {
+ err = PTR_ERR(this);
goto err_wedged;
}
/* Disable NEWCLIENT promotion */
- __i915_active_request_set(&rq[i]->timeline->last_request,
- dummy);
- i915_request_add(rq[i]);
+ __i915_active_fence_set(&i915_request_timeline(this)->last_request,
+ &dummy->fence);
+
+ rq[i] = i915_request_get(this);
+ i915_request_add(this);
}
dummy_request_free(dummy);
@@ -746,10 +1890,13 @@ static int live_suppress_wait_preempt(void *arg)
goto err_wedged;
}
- for (i = 0; i < ARRAY_SIZE(client); i++)
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
igt_spinner_end(&client[i].spin);
+ i915_request_put(rq[i]);
+ rq[i] = NULL;
+ }
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
goto err_wedged;
if (engine->execlists.preempt_hang.count) {
@@ -772,28 +1919,24 @@ err_client_1:
preempt_client_fini(&client[1]);
err_client_0:
preempt_client_fini(&client[0]);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
- for (i = 0; i < ARRAY_SIZE(client); i++)
+ for (i = 0; i < ARRAY_SIZE(client); i++) {
igt_spinner_end(&client[i].spin);
- i915_gem_set_wedged(i915);
+ i915_request_put(rq[i]);
+ }
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_3;
}
static int live_chain_preempt(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client hi, lo;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
/*
@@ -802,19 +1945,16 @@ static int live_chain_preempt(void *arg)
* the previously submitted spinner in B.
*/
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (preempt_client_init(i915, &hi))
- goto err_unlock;
+ if (preempt_client_init(gt, &hi))
+ return -ENOMEM;
- if (preempt_client_init(i915, &lo))
+ if (preempt_client_init(gt, &lo))
goto err_client_hi;
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX),
};
@@ -825,11 +1965,13 @@ static int live_chain_preempt(void *arg)
if (!intel_engine_has_preemption(engine))
continue;
- rq = igt_spinner_create_request(&lo.spin,
- lo.ctx, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&lo.spin,
+ lo.ctx, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
ring_size = rq->wa_tail - rq->head;
@@ -842,27 +1984,29 @@ static int live_chain_preempt(void *arg)
igt_spinner_end(&lo.spin);
if (i915_request_wait(rq, 0, HZ / 2) < 0) {
pr_err("Timed out waiting to flush %s\n", engine->name);
+ i915_request_put(rq);
goto err_wedged;
}
+ i915_request_put(rq);
- if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_wedged;
}
for_each_prime_number_from(count, 1, ring_size) {
- rq = igt_spinner_create_request(&hi.spin,
- hi.ctx, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&hi.spin,
+ hi.ctx, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
i915_request_add(rq);
if (!igt_wait_for_spinner(&hi.spin, rq))
goto err_wedged;
- rq = igt_spinner_create_request(&lo.spin,
- lo.ctx, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&lo.spin,
+ lo.ctx, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
i915_request_add(rq);
@@ -877,36 +2021,46 @@ static int live_chain_preempt(void *arg)
rq = igt_request_alloc(hi.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
engine->schedule(rq, &attr);
igt_spinner_end(&hi.spin);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to preempt over chain of %d\n",
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
+ i915_request_put(rq);
goto err_wedged;
}
igt_spinner_end(&lo.spin);
+ i915_request_put(rq);
rq = igt_request_alloc(lo.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
+
+ i915_request_get(rq);
i915_request_add(rq);
+
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
- drm_info_printer(i915->drm.dev);
+ drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to flush low priority chain of %d requests\n",
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
+
+ i915_request_put(rq);
goto err_wedged;
}
+ i915_request_put(rq);
}
if (igt_live_test_end(&t)) {
@@ -920,66 +2074,252 @@ err_client_lo:
preempt_client_fini(&lo);
err_client_hi:
preempt_client_fini(&hi);
-err_unlock:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
- err = -EIO;
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
return err;
err_wedged:
igt_spinner_end(&hi.spin);
igt_spinner_end(&lo.spin);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_lo;
}
+static int create_gang(struct intel_engine_cs *engine,
+ struct i915_request **prev)
+{
+ struct drm_i915_gem_object *obj;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ struct i915_vma *vma;
+ u32 *cs;
+ int err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_ce;
+ }
+
+ vma = i915_vma_instance(obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_obj;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto err_obj;
+
+ cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(cs))
+ goto err_obj;
+
+ /* Semaphore target: spin until zero */
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = lower_32_bits(vma->node.start);
+ *cs++ = upper_32_bits(vma->node.start);
+
+ if (*prev) {
+ u64 offset = (*prev)->batch->node.start;
+
+ /* Terminate the spinner in the next lower priority batch. */
+ *cs++ = MI_STORE_DWORD_IMM_GEN4;
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+ *cs++ = 0;
+ }
+
+ *cs++ = MI_BATCH_BUFFER_END;
+ i915_gem_object_flush_map(obj);
+ i915_gem_object_unpin_map(obj);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ goto err_obj;
+
+ rq->batch = vma;
+ i915_request_get(rq);
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, false);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
+ i915_vma_unlock(vma);
+ i915_request_add(rq);
+ if (err)
+ goto err_rq;
+
+ i915_gem_object_put(obj);
+ intel_context_put(ce);
+
+ rq->client_link.next = &(*prev)->client_link;
+ *prev = rq;
+ return 0;
+
+err_rq:
+ i915_request_put(rq);
+err_obj:
+ i915_gem_object_put(obj);
+err_ce:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_preempt_gang(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ /*
+ * Build as long a chain of preempters as we can, with each
+ * request higher priority than the last. Once we are ready, we release
+ * the last batch which then precolates down the chain, each releasing
+ * the next oldest in turn. The intent is to simply push as hard as we
+ * can with the number of preemptions, trying to exceed narrow HW
+ * limits. At a minimum, we insist that we can sort all the user
+ * high priority levels into execution order.
+ */
+
+ for_each_engine(engine, gt, id) {
+ struct i915_request *rq = NULL;
+ struct igt_live_test t;
+ IGT_TIMEOUT(end_time);
+ int prio = 0;
+ int err = 0;
+ u32 *cs;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
+ return -EIO;
+
+ do {
+ struct i915_sched_attr attr = {
+ .priority = I915_USER_PRIORITY(prio++),
+ };
+
+ err = create_gang(engine, &rq);
+ if (err)
+ break;
+
+ /* Submit each spinner at increasing priority */
+ engine->schedule(rq, &attr);
+
+ if (prio <= I915_PRIORITY_MAX)
+ continue;
+
+ if (prio > (INT_MAX >> I915_USER_PRIORITY_SHIFT))
+ break;
+
+ if (__igt_timeout(end_time, NULL))
+ break;
+ } while (1);
+ pr_debug("%s: Preempt chain of %d requests\n",
+ engine->name, prio);
+
+ /*
+ * Such that the last spinner is the highest priority and
+ * should execute first. When that spinner completes,
+ * it will terminate the next lowest spinner until there
+ * are no more spinners and the gang is complete.
+ */
+ cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC);
+ if (!IS_ERR(cs)) {
+ *cs = 0;
+ i915_gem_object_unpin_map(rq->batch->obj);
+ } else {
+ err = PTR_ERR(cs);
+ intel_gt_set_wedged(gt);
+ }
+
+ while (rq) { /* wait for each rq from highest to lowest prio */
+ struct i915_request *n =
+ list_next_entry(rq, client_link);
+
+ if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
+ struct drm_printer p =
+ drm_info_printer(engine->i915->drm.dev);
+
+ pr_err("Failed to flush chain of %d requests, at %d\n",
+ prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT);
+ intel_engine_dump(engine, &p,
+ "%s\n", engine->name);
+
+ err = -ETIME;
+ }
+
+ i915_request_put(rq);
+ rq = n;
+ }
+
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int live_preempt_hang(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
int err = -ENOMEM;
- if (!HAS_LOGICAL_RING_PREEMPTION(i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
return 0;
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- if (igt_spinner_init(&spin_hi, i915))
- goto err_unlock;
+ if (igt_spinner_init(&spin_hi, gt))
+ return -ENOMEM;
- if (igt_spinner_init(&spin_lo, i915))
+ if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
- ctx_hi = kernel_context(i915);
+ ctx_hi = kernel_context(gt->i915);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
- ctx_lo = kernel_context(i915);
+ ctx_lo = kernel_context(gt->i915);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority =
I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
- rq = igt_spinner_create_request(&spin_lo, ctx_lo, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ctx_lo;
@@ -989,13 +2329,13 @@ static int live_preempt_hang(void *arg)
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
- rq = igt_spinner_create_request(&spin_hi, ctx_hi, engine,
- MI_ARB_CHECK);
+ rq = spinner_create_request(&spin_hi, ctx_hi, engine,
+ MI_ARB_CHECK);
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
@@ -1011,28 +2351,28 @@ static int live_preempt_hang(void *arg)
HZ / 10)) {
pr_err("Preemption did not occur within timeout!");
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
- set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
- i915_reset_engine(engine, NULL);
- clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+ set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
+ intel_engine_reset(engine, NULL);
+ clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
engine->execlists.preempt_hang.inject_hang = false;
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
- if (igt_flush_test(i915, I915_WAIT_LOCKED)) {
+ if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_ctx_lo;
}
@@ -1047,10 +2387,105 @@ err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
-err_unlock:
- igt_flush_test(i915, I915_WAIT_LOCKED);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_preempt_timeout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct igt_spinner spin_lo;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = -ENOMEM;
+
+ /*
+ * Check that we force preemption to occur by cancelling the previous
+ * context if it refuses to yield the GPU.
+ */
+ if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT))
+ return 0;
+
+ if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915))
+ return 0;
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (igt_spinner_init(&spin_lo, gt))
+ return -ENOMEM;
+
+ ctx_hi = kernel_context(gt->i915);
+ if (!ctx_hi)
+ goto err_spin_lo;
+ ctx_hi->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY);
+
+ ctx_lo = kernel_context(gt->i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority =
+ I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY);
+
+ for_each_engine(engine, gt, id) {
+ unsigned long saved_timeout;
+ struct i915_request *rq;
+
+ if (!intel_engine_has_preemption(engine))
+ continue;
+
+ rq = spinner_create_request(&spin_lo, ctx_lo, engine,
+ MI_NOOP); /* preemption disabled */
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ i915_request_add(rq);
+ if (!igt_wait_for_spinner(&spin_lo, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto err_ctx_lo;
+ }
+
+ rq = igt_request_alloc(ctx_hi, engine);
+ if (IS_ERR(rq)) {
+ igt_spinner_end(&spin_lo);
+ err = PTR_ERR(rq);
+ goto err_ctx_lo;
+ }
+
+ /* Flush the previous CS ack before changing timeouts */
+ while (READ_ONCE(engine->execlists.pending[0]))
+ cpu_relax();
+
+ saved_timeout = engine->props.preempt_timeout_ms;
+ engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ engine->props.preempt_timeout_ms = saved_timeout;
+
+ if (i915_request_wait(rq, 0, HZ / 10) < 0) {
+ intel_gt_set_wedged(gt);
+ i915_request_put(rq);
+ err = -ETIME;
+ goto err_ctx_lo;
+ }
+
+ igt_spinner_end(&spin_lo);
+ i915_request_put(rq);
+ }
+
+ err = 0;
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_spin_lo:
+ igt_spinner_fini(&spin_lo);
return err;
}
@@ -1065,7 +2500,7 @@ static int random_priority(struct rnd_state *rnd)
}
struct preempt_smoke {
- struct drm_i915_private *i915;
+ struct intel_gt *gt;
struct i915_gem_context **contexts;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *batch;
@@ -1089,7 +2524,11 @@ static int smoke_submit(struct preempt_smoke *smoke,
int err = 0;
if (batch) {
- vma = i915_vma_instance(batch, ctx->vm, NULL);
+ struct i915_address_space *vm;
+
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ vma = i915_vma_instance(batch, vm, NULL);
+ i915_vm_put(vm);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -1108,11 +2547,13 @@ static int smoke_submit(struct preempt_smoke *smoke,
if (vma) {
i915_vma_lock(vma);
- err = rq->engine->emit_bb_start(rq,
- vma->node.start,
- PAGE_SIZE, 0);
+ err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
+ if (!err)
+ err = rq->engine->emit_bb_start(rq,
+ vma->node.start,
+ PAGE_SIZE, 0);
i915_vma_unlock(vma);
}
@@ -1136,11 +2577,9 @@ static int smoke_crescendo_thread(void *arg)
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
- mutex_lock(&smoke->i915->drm.struct_mutex);
err = smoke_submit(smoke,
ctx, count % I915_PRIORITY_MAX,
smoke->batch);
- mutex_unlock(&smoke->i915->drm.struct_mutex);
if (err)
return err;
@@ -1161,9 +2600,7 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
unsigned long count;
int err = 0;
- mutex_unlock(&smoke->i915->drm.struct_mutex);
-
- for_each_engine(engine, smoke->i915, id) {
+ for_each_engine(engine, smoke->gt, id) {
arg[id] = *smoke;
arg[id].engine = engine;
if (!(flags & BATCH))
@@ -1179,8 +2616,10 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
get_task_struct(tsk[id]);
}
+ yield(); /* start all threads before we kthread_stop() */
+
count = 0;
- for_each_engine(engine, smoke->i915, id) {
+ for_each_engine(engine, smoke->gt, id) {
int status;
if (IS_ERR_OR_NULL(tsk[id]))
@@ -1195,11 +2634,9 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
put_task_struct(tsk[id]);
}
- mutex_lock(&smoke->i915->drm.struct_mutex);
-
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
return 0;
}
@@ -1211,7 +2648,7 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
count = 0;
do {
- for_each_engine(smoke->engine, smoke->i915, id) {
+ for_each_engine(smoke->engine, smoke->gt, id) {
struct i915_gem_context *ctx = smoke_context(smoke);
int err;
@@ -1227,25 +2664,24 @@ static int smoke_random(struct preempt_smoke *smoke, unsigned int flags)
pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n",
count, flags,
- RUNTIME_INFO(smoke->i915)->num_engines, smoke->ncontext);
+ RUNTIME_INFO(smoke->gt->i915)->num_engines, smoke->ncontext);
return 0;
}
static int live_preempt_smoke(void *arg)
{
struct preempt_smoke smoke = {
- .i915 = arg,
+ .gt = arg,
.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed),
.ncontext = 1024,
};
const unsigned int phase[] = { 0, BATCH };
- intel_wakeref_t wakeref;
struct igt_live_test t;
int err = -ENOMEM;
u32 *cs;
int n;
- if (!HAS_LOGICAL_RING_PREEMPTION(smoke.i915))
+ if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915))
return 0;
smoke.contexts = kmalloc_array(smoke.ncontext,
@@ -1254,13 +2690,11 @@ static int live_preempt_smoke(void *arg)
if (!smoke.contexts)
return -ENOMEM;
- mutex_lock(&smoke.i915->drm.struct_mutex);
- wakeref = intel_runtime_pm_get(&smoke.i915->runtime_pm);
-
- smoke.batch = i915_gem_object_create_internal(smoke.i915, PAGE_SIZE);
+ smoke.batch =
+ i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE);
if (IS_ERR(smoke.batch)) {
err = PTR_ERR(smoke.batch);
- goto err_unlock;
+ goto err_free;
}
cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB);
@@ -1274,13 +2708,13 @@ static int live_preempt_smoke(void *arg)
i915_gem_object_flush_map(smoke.batch);
i915_gem_object_unpin_map(smoke.batch);
- if (igt_live_test_begin(&t, smoke.i915, __func__, "all")) {
+ if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) {
err = -EIO;
goto err_batch;
}
for (n = 0; n < smoke.ncontext; n++) {
- smoke.contexts[n] = kernel_context(smoke.i915);
+ smoke.contexts[n] = kernel_context(smoke.gt->i915);
if (!smoke.contexts[n])
goto err_ctx;
}
@@ -1307,15 +2741,13 @@ err_ctx:
err_batch:
i915_gem_object_put(smoke.batch);
-err_unlock:
- intel_runtime_pm_put(&smoke.i915->runtime_pm, wakeref);
- mutex_unlock(&smoke.i915->drm.struct_mutex);
+err_free:
kfree(smoke.contexts);
return err;
}
-static int nop_virtual_engine(struct drm_i915_private *i915,
+static int nop_virtual_engine(struct intel_gt *gt,
struct intel_engine_cs **siblings,
unsigned int nsibling,
unsigned int nctx,
@@ -1323,28 +2755,18 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
#define CHAIN BIT(0)
{
IGT_TIMEOUT(end_time);
- struct i915_request *request[16];
- struct i915_gem_context *ctx[16];
+ struct i915_request *request[16] = {};
struct intel_context *ve[16];
unsigned long n, prime, nc;
struct igt_live_test t;
ktime_t times[2] = {};
int err;
- GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ctx));
+ GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve));
for (n = 0; n < nctx; n++) {
- ctx[n] = kernel_context(i915);
- if (!ctx[n]) {
- err = -ENOMEM;
- nctx = n;
- goto out;
- }
-
- ve[n] = intel_execlists_create_virtual(ctx[n],
- siblings, nsibling);
+ ve[n] = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve[n])) {
- kernel_context_close(ctx[n]);
err = PTR_ERR(ve[n]);
nctx = n;
goto out;
@@ -1353,13 +2775,12 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
err = intel_context_pin(ve[n]);
if (err) {
intel_context_put(ve[n]);
- kernel_context_close(ctx[n]);
nctx = n;
goto out;
}
}
- err = igt_live_test_begin(&t, i915, __func__, ve[0]->engine->name);
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name);
if (err)
goto out;
@@ -1369,27 +2790,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
if (flags & CHAIN) {
for (nc = 0; nc < nctx; nc++) {
for (n = 0; n < prime; n++) {
- request[nc] =
- i915_request_create(ve[nc]);
- if (IS_ERR(request[nc])) {
- err = PTR_ERR(request[nc]);
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve[nc]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
goto out;
}
- i915_request_add(request[nc]);
+ if (request[nc])
+ i915_request_put(request[nc]);
+ request[nc] = i915_request_get(rq);
+ i915_request_add(rq);
}
}
} else {
for (n = 0; n < prime; n++) {
for (nc = 0; nc < nctx; nc++) {
- request[nc] =
- i915_request_create(ve[nc]);
- if (IS_ERR(request[nc])) {
- err = PTR_ERR(request[nc]);
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve[nc]);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
goto out;
}
- i915_request_add(request[nc]);
+ if (request[nc])
+ i915_request_put(request[nc]);
+ request[nc] = i915_request_get(rq);
+ i915_request_add(rq);
}
}
}
@@ -1406,7 +2835,7 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
request[nc]->fence.context,
request[nc]->fence.seqno);
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
break;
}
}
@@ -1415,6 +2844,11 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
if (prime == 1)
times[0] = times[1];
+ for (nc = 0; nc < nctx; nc++) {
+ i915_request_put(request[nc]);
+ request[nc] = NULL;
+ }
+
if (__igt_timeout(end_time, NULL))
break;
}
@@ -1428,37 +2862,35 @@ static int nop_virtual_engine(struct drm_i915_private *i915,
prime, div64_u64(ktime_to_ns(times[1]), prime));
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (nc = 0; nc < nctx; nc++) {
+ i915_request_put(request[nc]);
intel_context_unpin(ve[nc]);
intel_context_put(ve[nc]);
- kernel_context_close(ctx[nc]);
}
return err;
}
static int live_virtual_engine(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
struct intel_engine_cs *engine;
enum intel_engine_id id;
unsigned int class, inst;
- int err = -ENODEV;
+ int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
- for_each_engine(engine, i915, id) {
- err = nop_virtual_engine(i915, &engine, 1, 1, 0);
+ for_each_engine(engine, gt, id) {
+ err = nop_virtual_engine(gt, &engine, 1, 1, 0);
if (err) {
pr_err("Failed to wrap engine %s: err=%d\n",
engine->name, err);
- goto out_unlock;
+ return err;
}
}
@@ -1467,37 +2899,34 @@ static int live_virtual_engine(void *arg)
nsibling = 0;
for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!i915->engine_class[class][inst])
+ if (!gt->engine_class[class][inst])
continue;
- siblings[nsibling++] = i915->engine_class[class][inst];
+ siblings[nsibling++] = gt->engine_class[class][inst];
}
if (nsibling < 2)
continue;
for (n = 1; n <= nsibling + 1; n++) {
- err = nop_virtual_engine(i915, siblings, nsibling,
+ err = nop_virtual_engine(gt, siblings, nsibling,
n, 0);
if (err)
- goto out_unlock;
+ return err;
}
- err = nop_virtual_engine(i915, siblings, nsibling, n, CHAIN);
+ err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN);
if (err)
- goto out_unlock;
+ return err;
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- return err;
+ return 0;
}
-static int mask_virtual_engine(struct drm_i915_private *i915,
+static int mask_virtual_engine(struct intel_gt *gt,
struct intel_engine_cs **siblings,
unsigned int nsibling)
{
struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
- struct i915_gem_context *ctx;
struct intel_context *ve;
struct igt_live_test t;
unsigned int n;
@@ -1508,11 +2937,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
* restrict it to our desired engine within the virtual engine.
*/
- ctx = kernel_context(i915);
- if (!ctx)
- return -ENOMEM;
-
- ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
goto out_close;
@@ -1522,7 +2947,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
if (err)
goto out_put;
- err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
if (err)
goto out_unpin;
@@ -1553,7 +2978,7 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
request[n]->fence.context,
request[n]->fence.seqno);
GEM_TRACE_DUMP();
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
err = -EIO;
goto out;
}
@@ -1568,11 +2993,8 @@ static int mask_virtual_engine(struct drm_i915_private *i915,
}
err = igt_live_test_end(&t);
- if (err)
- goto out;
-
out:
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
for (n = 0; n < nsibling; n++)
@@ -1583,46 +3005,183 @@ out_unpin:
out_put:
intel_context_put(ve);
out_close:
- kernel_context_close(ctx);
return err;
}
static int live_virtual_mask(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
unsigned int class, inst;
- int err = 0;
+ int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
unsigned int nsibling;
nsibling = 0;
for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!i915->engine_class[class][inst])
+ if (!gt->engine_class[class][inst])
break;
- siblings[nsibling++] = i915->engine_class[class][inst];
+ siblings[nsibling++] = gt->engine_class[class][inst];
}
if (nsibling < 2)
continue;
- err = mask_virtual_engine(i915, siblings, nsibling);
+ err = mask_virtual_engine(gt, siblings, nsibling);
if (err)
- goto out_unlock;
+ return err;
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
+ return 0;
+}
+
+static int preserved_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ struct i915_request *last = NULL;
+ struct intel_context *ve;
+ struct i915_vma *scratch;
+ struct igt_live_test t;
+ unsigned int n;
+ int err = 0;
+ u32 *cs;
+
+ scratch = create_scratch(siblings[0]->gt);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ ve = intel_execlists_create_virtual(siblings, nsibling);
+ if (IS_ERR(ve)) {
+ err = PTR_ERR(ve);
+ goto out_scratch;
+ }
+
+ err = intel_context_pin(ve);
+ if (err)
+ goto out_put;
+
+ err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name);
+ if (err)
+ goto out_unpin;
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ struct intel_engine_cs *engine = siblings[n % nsibling];
+ struct i915_request *rq;
+
+ rq = i915_request_create(ve);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_end;
+ }
+
+ i915_request_put(last);
+ last = i915_request_get(rq);
+
+ cs = intel_ring_begin(rq, 8);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ err = PTR_ERR(cs);
+ goto out_end;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+
+ *cs++ = MI_LOAD_REGISTER_IMM(1);
+ *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW);
+ *cs++ = n + 1;
+
+ *cs++ = MI_NOOP;
+ intel_ring_advance(rq, cs);
+
+ /* Restrict this request to run on a particular engine */
+ rq->execution_mask = engine->mask;
+ i915_request_add(rq);
+ }
+
+ if (i915_request_wait(last, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto out_end;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto out_end;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n] != n) {
+ pr_err("Incorrect value[%d] found for GPR[%d]\n",
+ cs[n], n);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+out_end:
+ if (igt_live_test_end(&t))
+ err = -EIO;
+ i915_request_put(last);
+out_unpin:
+ intel_context_unpin(ve);
+out_put:
+ intel_context_put(ve);
+out_scratch:
+ i915_vma_unpin_and_release(&scratch, 0);
return err;
}
-static int bond_virtual_engine(struct drm_i915_private *i915,
+static int live_virtual_preserved(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class, inst;
+
+ /*
+ * Check that the context image retains non-privileged (user) registers
+ * from one engine to the next. For this we check that the CS_GPR
+ * are preserved.
+ */
+
+ if (USES_GUC_SUBMISSION(gt->i915))
+ return 0;
+
+ /* As we use CS_GPR we cannot run before they existed on all engines. */
+ if (INTEL_GEN(gt->i915) < 9)
+ return 0;
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, err;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!gt->engine_class[class][inst])
+ continue;
+
+ siblings[nsibling++] = gt->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ err = preserved_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int bond_virtual_engine(struct intel_gt *gt,
unsigned int class,
struct intel_engine_cs **siblings,
unsigned int nsibling,
@@ -1630,21 +3189,59 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
#define BOND_SCHEDULE BIT(0)
{
struct intel_engine_cs *master;
- struct i915_gem_context *ctx;
struct i915_request *rq[16];
enum intel_engine_id id;
+ struct igt_spinner spin;
unsigned long n;
int err;
+ /*
+ * A set of bonded requests is intended to be run concurrently
+ * across a number of engines. We use one request per-engine
+ * and a magic fence to schedule each of the bonded requests
+ * at the same time. A consequence of our current scheduler is that
+ * we only move requests to the HW ready queue when the request
+ * becomes ready, that is when all of its prerequisite fences have
+ * been signaled. As one of those fences is the master submit fence,
+ * there is a delay on all secondary fences as the HW may be
+ * currently busy. Equally, as all the requests are independent,
+ * they may have other fences that delay individual request
+ * submission to HW. Ergo, we do not guarantee that all requests are
+ * immediately submitted to HW at the same time, just that if the
+ * rules are abided by, they are ready at the same time as the
+ * first is submitted. Userspace can embed semaphores in its batch
+ * to ensure parallel execution of its phases as it requires.
+ * Though naturally it gets requested that perhaps the scheduler should
+ * take care of parallel execution, even across preemption events on
+ * different HW. (The proper answer is of course "lalalala".)
+ *
+ * With the submit-fence, we have identified three possible phases
+ * of synchronisation depending on the master fence: queued (not
+ * ready), executing, and signaled. The first two are quite simple
+ * and checked below. However, the signaled master fence handling is
+ * contentious. Currently we do not distinguish between a signaled
+ * fence and an expired fence, as once signaled it does not convey
+ * any information about the previous execution. It may even be freed
+ * and hence checking later it may not exist at all. Ergo we currently
+ * do not apply the bonding constraint for an already signaled fence,
+ * as our expectation is that it should not constrain the secondaries
+ * and is outside of the scope of the bonded request API (i.e. all
+ * userspace requests are meant to be running in parallel). As
+ * it imposes no constraint, and is effectively a no-op, we do not
+ * check below as normal execution flows are checked extensively above.
+ *
+ * XXX Is the degenerate handling of signaled submit fences the
+ * expected behaviour for userpace?
+ */
+
GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1);
- ctx = kernel_context(i915);
- if (!ctx)
+ if (igt_spinner_init(&spin, gt))
return -ENOMEM;
err = 0;
rq[0] = ERR_PTR(-ENOMEM);
- for_each_engine(master, i915, id) {
+ for_each_engine(master, gt, id) {
struct i915_sw_fence fence = {};
if (master->class == class)
@@ -1652,7 +3249,9 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq));
- rq[0] = igt_request_alloc(ctx, master);
+ rq[0] = igt_spinner_create_request(&spin,
+ master->kernel_context,
+ MI_NOOP);
if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
goto out;
@@ -1665,16 +3264,21 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
&fence,
GFP_KERNEL);
}
+
i915_request_add(rq[0]);
if (err < 0)
goto out;
+ if (!(flags & BOND_SCHEDULE) &&
+ !igt_wait_for_spinner(&spin, rq[0])) {
+ err = -EIO;
+ goto out;
+ }
+
for (n = 0; n < nsibling; n++) {
struct intel_context *ve;
- ve = intel_execlists_create_virtual(ctx,
- siblings,
- nsibling);
+ ve = intel_execlists_create_virtual(siblings, nsibling);
if (IS_ERR(ve)) {
err = PTR_ERR(ve);
onstack_fence_fini(&fence);
@@ -1716,6 +3320,8 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
}
}
onstack_fence_fini(&fence);
+ intel_engine_flush_submission(master);
+ igt_spinner_end(&spin);
if (i915_request_wait(rq[0], 0, HZ / 10) < 0) {
pr_err("Master request did not execute (on %s)!\n",
@@ -1749,10 +3355,10 @@ static int bond_virtual_engine(struct drm_i915_private *i915,
out:
for (n = 0; !IS_ERR(rq[n]); n++)
i915_request_put(rq[n]);
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
- kernel_context_close(ctx);
+ igt_spinner_fini(&spin);
return err;
}
@@ -1766,70 +3372,725 @@ static int live_virtual_bond(void *arg)
{ "schedule", BOND_SCHEDULE },
{ },
};
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
unsigned int class, inst;
- int err = 0;
+ int err;
- if (USES_GUC_SUBMISSION(i915))
+ if (USES_GUC_SUBMISSION(gt->i915))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
-
for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
const struct phase *p;
int nsibling;
nsibling = 0;
for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
- if (!i915->engine_class[class][inst])
+ if (!gt->engine_class[class][inst])
break;
GEM_BUG_ON(nsibling == ARRAY_SIZE(siblings));
- siblings[nsibling++] = i915->engine_class[class][inst];
+ siblings[nsibling++] = gt->engine_class[class][inst];
}
if (nsibling < 2)
continue;
for (p = phases; p->name; p++) {
- err = bond_virtual_engine(i915,
+ err = bond_virtual_engine(gt,
class, siblings, nsibling,
p->flags);
if (err) {
pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n",
__func__, p->name, class, nsibling, err);
- goto out_unlock;
+ return err;
}
}
}
-out_unlock:
- mutex_unlock(&i915->drm.struct_mutex);
+ return 0;
+}
+
+static int reset_virtual_engine(struct intel_gt *gt,
+ struct intel_engine_cs **siblings,
+ unsigned int nsibling)
+{
+ struct intel_engine_cs *engine;
+ struct intel_context *ve;
+ unsigned long *heartbeat;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ unsigned int n;
+ int err = 0;
+
+ /*
+ * In order to support offline error capture for fast preempt reset,
+ * we need to decouple the guilty request and ensure that it and its
+ * descendents are not executed while the capture is in progress.
+ */
+
+ heartbeat = kmalloc_array(nsibling, sizeof(*heartbeat), GFP_KERNEL);
+ if (!heartbeat)
+ return -ENOMEM;
+
+ if (igt_spinner_init(&spin, gt)) {
+ err = -ENOMEM;
+ goto out_free;
+ }
+
+ ve = intel_execlists_create_virtual(siblings, nsibling);
+ if (IS_ERR(ve)) {
+ err = PTR_ERR(ve);
+ goto out_spin;
+ }
+
+ for (n = 0; n < nsibling; n++)
+ engine_heartbeat_disable(siblings[n], &heartbeat[n]);
+
+ rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out_heartbeat;
+ }
+ i915_request_add(rq);
+
+ if (!igt_wait_for_spinner(&spin, rq)) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out_heartbeat;
+ }
+
+ engine = rq->engine;
+ GEM_BUG_ON(engine == ve->engine);
+
+ /* Take ownership of the reset and tasklet */
+ if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
+ &gt->reset.flags)) {
+ intel_gt_set_wedged(gt);
+ err = -EBUSY;
+ goto out_heartbeat;
+ }
+ tasklet_disable(&engine->execlists.tasklet);
+
+ engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+ GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+ /* Fake a preemption event; failed of course */
+ spin_lock_irq(&engine->active.lock);
+ __unwind_incomplete_requests(engine);
+ spin_unlock_irq(&engine->active.lock);
+ GEM_BUG_ON(rq->engine != ve->engine);
+
+ /* Reset the engine while keeping our active request on hold */
+ execlists_hold(engine, rq);
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ intel_engine_reset(engine, NULL);
+ GEM_BUG_ON(rq->fence.error != -EIO);
+
+ /* Release our grasp on the engine, letting CS flow again */
+ tasklet_enable(&engine->execlists.tasklet);
+ clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags);
+
+ /* Check that we do not resubmit the held request */
+ i915_request_get(rq);
+ if (!i915_request_wait(rq, 0, HZ / 5)) {
+ pr_err("%s: on hold request completed!\n",
+ engine->name);
+ intel_gt_set_wedged(gt);
+ err = -EIO;
+ goto out_rq;
+ }
+ GEM_BUG_ON(!i915_request_on_hold(rq));
+
+ /* But is resubmitted on release */
+ execlists_unhold(engine, rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("%s: held request did not complete!\n",
+ engine->name);
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ }
+
+out_rq:
+ i915_request_put(rq);
+out_heartbeat:
+ for (n = 0; n < nsibling; n++)
+ engine_heartbeat_enable(siblings[n], heartbeat[n]);
+
+ intel_context_put(ve);
+out_spin:
+ igt_spinner_fini(&spin);
+out_free:
+ kfree(heartbeat);
return err;
}
+static int live_virtual_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+ unsigned int class, inst;
+
+ /*
+ * Check that we handle a reset event within a virtual engine.
+ * Only the physical engine is reset, but we have to check the flow
+ * of the virtual requests around the reset, and make sure it is not
+ * forgotten.
+ */
+
+ if (USES_GUC_SUBMISSION(gt->i915))
+ return 0;
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+ int nsibling, err;
+
+ nsibling = 0;
+ for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+ if (!gt->engine_class[class][inst])
+ continue;
+
+ siblings[nsibling++] = gt->engine_class[class][inst];
+ }
+ if (nsibling < 2)
+ continue;
+
+ err = reset_virtual_engine(gt, siblings, nsibling);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
int intel_execlists_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_sanitycheck),
+ SUBTEST(live_unlite_switch),
+ SUBTEST(live_unlite_preempt),
+ SUBTEST(live_hold_reset),
+ SUBTEST(live_timeslice_preempt),
+ SUBTEST(live_timeslice_queue),
SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
+ SUBTEST(live_nopreempt),
+ SUBTEST(live_preempt_cancel),
SUBTEST(live_suppress_self_preempt),
SUBTEST(live_suppress_wait_preempt),
SUBTEST(live_chain_preempt),
+ SUBTEST(live_preempt_gang),
SUBTEST(live_preempt_hang),
+ SUBTEST(live_preempt_timeout),
SUBTEST(live_preempt_smoke),
SUBTEST(live_virtual_engine),
SUBTEST(live_virtual_mask),
+ SUBTEST(live_virtual_preserved),
SUBTEST(live_virtual_bond),
+ SUBTEST(live_virtual_reset),
};
if (!HAS_EXECLISTS(i915))
return 0;
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
+
+static void hexdump(const void *buf, size_t len)
+{
+ const size_t rowsize = 8 * sizeof(u32);
+ const void *prev = NULL;
+ bool skip = false;
+ size_t pos;
+
+ for (pos = 0; pos < len; pos += rowsize) {
+ char line[128];
+
+ if (prev && !memcmp(prev, buf + pos, rowsize)) {
+ if (!skip) {
+ pr_info("*\n");
+ skip = true;
+ }
+ continue;
+ }
+
+ WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+ rowsize, sizeof(u32),
+ line, sizeof(line),
+ false) >= sizeof(line));
+ pr_info("[%04zx] %s\n", pos, line);
+
+ prev = buf + pos;
+ skip = false;
+ }
+}
+
+static int live_lrc_layout(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ u32 *lrc;
+ int err;
+
+ /*
+ * Check the registers offsets we use to create the initial reg state
+ * match the layout saved by HW.
+ */
+
+ lrc = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!lrc)
+ return -ENOMEM;
+
+ err = 0;
+ for_each_engine(engine, gt, id) {
+ u32 *hw;
+ int dw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE),
+ engine->kernel_context,
+ engine,
+ engine->kernel_context->ring,
+ true);
+
+ dw = 0;
+ do {
+ u32 lri = hw[dw];
+
+ if (lri == 0) {
+ dw++;
+ continue;
+ }
+
+ if (lrc[dw] == 0) {
+ pr_debug("%s: skipped instruction %x at dword %d\n",
+ engine->name, lri, dw);
+ dw++;
+ continue;
+ }
+
+ if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+ pr_err("%s: Expected LRI command at dword %d, found %08x\n",
+ engine->name, dw, lri);
+ err = -EINVAL;
+ break;
+ }
+
+ if (lrc[dw] != lri) {
+ pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
+ engine->name, dw, lri, lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ lri &= 0x7f;
+ lri++;
+ dw++;
+
+ while (lri) {
+ if (hw[dw] != lrc[dw]) {
+ pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
+ engine->name, dw, hw[dw], lrc[dw]);
+ err = -EINVAL;
+ break;
+ }
+
+ /*
+ * Skip over the actual register value as we
+ * expect that to differ.
+ */
+ dw += 2;
+ lri -= 2;
+ }
+ } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+ if (err) {
+ pr_info("%s: HW register image:\n", engine->name);
+ hexdump(hw, PAGE_SIZE);
+
+ pr_info("%s: SW register image:\n", engine->name);
+ hexdump(lrc, PAGE_SIZE);
+ }
+
+ i915_gem_object_unpin_map(engine->default_state);
+ if (err)
+ break;
+ }
+
+ kfree(lrc);
+ return err;
+}
+
+static int find_offset(const u32 *lri, u32 offset)
+{
+ int i;
+
+ for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+ if (lri[i] == offset)
+ return i;
+
+ return -1;
+}
+
+static int live_lrc_fixed(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the assumed register offsets match the actual locations in
+ * the context image.
+ */
+
+ for_each_engine(engine, gt, id) {
+ const struct {
+ u32 reg;
+ u32 offset;
+ const char *name;
+ } tbl[] = {
+ {
+ i915_mmio_reg_offset(RING_START(engine->mmio_base)),
+ CTX_RING_START - 1,
+ "RING_START"
+ },
+ {
+ i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
+ CTX_RING_CTL - 1,
+ "RING_CTL"
+ },
+ {
+ i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
+ CTX_RING_HEAD - 1,
+ "RING_HEAD"
+ },
+ {
+ i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
+ CTX_RING_TAIL - 1,
+ "RING_TAIL"
+ },
+ {
+ i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
+ lrc_ring_mi_mode(engine),
+ "RING_MI_MODE"
+ },
+ {
+ i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
+ CTX_BB_STATE - 1,
+ "BB_STATE"
+ },
+ { },
+ }, *t;
+ u32 *hw;
+
+ if (!engine->default_state)
+ continue;
+
+ hw = i915_gem_object_pin_map(engine->default_state,
+ I915_MAP_WB);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ break;
+ }
+ hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+ for (t = tbl; t->name; t++) {
+ int dw = find_offset(hw, t->reg);
+
+ if (dw != t->offset) {
+ pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
+ engine->name,
+ t->name,
+ t->reg,
+ dw,
+ t->offset);
+ err = -EINVAL;
+ }
+ }
+
+ i915_gem_object_unpin_map(engine->default_state);
+ }
+
+ return err;
+}
+
+static int __live_lrc_state(struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ enum {
+ RING_START_IDX = 0,
+ RING_TAIL_IDX,
+ MAX_IDX
+ };
+ u32 expected[MAX_IDX];
+ u32 *cs;
+ int err;
+ int n;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = intel_context_pin(ce);
+ if (err)
+ goto err_put;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_unpin;
+ }
+
+ cs = intel_ring_begin(rq, 4 * MAX_IDX);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_unpin;
+ }
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_START_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ expected[RING_START_IDX] = i915_ggtt_offset(ce->ring->vma);
+
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
+ *cs++ = i915_ggtt_offset(scratch) + RING_TAIL_IDX * sizeof(u32);
+ *cs++ = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ intel_engine_flush_submission(engine);
+ expected[RING_TAIL_IDX] = ce->ring->tail;
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < MAX_IDX; n++) {
+ if (cs[n] != expected[n]) {
+ pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
+ engine->name, n, cs[n], expected[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_unpin:
+ intel_context_unpin(ce);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_lrc_state(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check the live register state matches what we expect for this
+ * intel_context.
+ */
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ for_each_engine(engine, gt, id) {
+ err = __live_lrc_state(engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+static int gpr_make_dirty(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+ u32 *cs;
+ int n;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ cs = intel_ring_begin(rq, 2 * NUM_GPR_DW + 2);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return PTR_ERR(cs);
+ }
+
+ *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = STACK_MAGIC;
+ }
+ *cs++ = MI_NOOP;
+
+ intel_ring_advance(rq, cs);
+ i915_request_add(rq);
+
+ return 0;
+}
+
+static int __live_gpr_clear(struct intel_engine_cs *engine,
+ struct i915_vma *scratch)
+{
+ struct intel_context *ce;
+ struct i915_request *rq;
+ u32 *cs;
+ int err;
+ int n;
+
+ if (INTEL_GEN(engine->i915) < 9 && engine->class != RENDER_CLASS)
+ return 0; /* GPR only on rcs0 for gen8 */
+
+ err = gpr_make_dirty(engine);
+ if (err)
+ return err;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto err_put;
+ }
+
+ cs = intel_ring_begin(rq, 4 * NUM_GPR_DW);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(rq);
+ goto err_put;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = CS_GPR(engine, n);
+ *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32);
+ *cs++ = 0;
+ }
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = -ETIME;
+ goto err_rq;
+ }
+
+ cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ goto err_rq;
+ }
+
+ for (n = 0; n < NUM_GPR_DW; n++) {
+ if (cs[n]) {
+ pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
+ engine->name,
+ n / 2, n & 1 ? "udw" : "ldw",
+ cs[n]);
+ err = -EINVAL;
+ break;
+ }
+ }
+
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_rq:
+ i915_request_put(rq);
+err_put:
+ intel_context_put(ce);
+ return err;
+}
+
+static int live_gpr_clear(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct i915_vma *scratch;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Check that GPR registers are cleared in new contexts as we need
+ * to avoid leaking any information from previous contexts.
+ */
+
+ scratch = create_scratch(gt);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ for_each_engine(engine, gt, id) {
+ err = __live_gpr_clear(engine, scratch);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_lrc_layout),
+ SUBTEST(live_lrc_fixed),
+ SUBTEST(live_lrc_state),
+ SUBTEST(live_gpr_clear),
+ };
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(i915))
return 0;
- return i915_subtests(tests, i915);
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c
new file mode 100644
index 000000000000..de1f83100fb6
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -0,0 +1,419 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gt/intel_engine_pm.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/mock_context.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+
+struct live_mocs {
+ struct drm_i915_mocs_table table;
+ struct i915_vma *scratch;
+ void *vaddr;
+};
+
+static int request_add_sync(struct i915_request *rq, int err)
+{
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIME;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static struct i915_vma *create_scratch(struct intel_gt *gt)
+{
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int err;
+
+ obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma)) {
+ i915_gem_object_put(obj);
+ return vma;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err) {
+ i915_gem_object_put(obj);
+ return ERR_PTR(err);
+ }
+
+ return vma;
+}
+
+static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
+{
+ int err;
+
+ if (!get_mocs_settings(gt->i915, &arg->table))
+ return -EINVAL;
+
+ arg->scratch = create_scratch(gt);
+ if (IS_ERR(arg->scratch))
+ return PTR_ERR(arg->scratch);
+
+ arg->vaddr = i915_gem_object_pin_map(arg->scratch->obj, I915_MAP_WB);
+ if (IS_ERR(arg->vaddr)) {
+ err = PTR_ERR(arg->vaddr);
+ goto err_scratch;
+ }
+
+ return 0;
+
+err_scratch:
+ i915_vma_unpin_and_release(&arg->scratch, 0);
+ return err;
+}
+
+static void live_mocs_fini(struct live_mocs *arg)
+{
+ i915_vma_unpin_and_release(&arg->scratch, I915_VMA_RELEASE_MAP);
+}
+
+static int read_regs(struct i915_request *rq,
+ u32 addr, unsigned int count,
+ uint32_t *offset)
+{
+ unsigned int i;
+ u32 *cs;
+
+ GEM_BUG_ON(!IS_ALIGNED(*offset, sizeof(u32)));
+
+ cs = intel_ring_begin(rq, 4 * count);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ for (i = 0; i < count; i++) {
+ *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
+ *cs++ = addr;
+ *cs++ = *offset;
+ *cs++ = 0;
+
+ addr += sizeof(u32);
+ *offset += sizeof(u32);
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int read_mocs_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table,
+ uint32_t *offset)
+{
+ u32 addr;
+
+ if (HAS_GLOBAL_MOCS_REGISTERS(rq->i915))
+ addr = global_mocs_offset();
+ else
+ addr = mocs_offset(rq->engine);
+
+ return read_regs(rq, addr, table->n_entries, offset);
+}
+
+static int read_l3cc_table(struct i915_request *rq,
+ const struct drm_i915_mocs_table *table,
+ uint32_t *offset)
+{
+ u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+
+ return read_regs(rq, addr, (table->n_entries + 1) / 2, offset);
+}
+
+static int check_mocs_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table,
+ uint32_t **vaddr)
+{
+ unsigned int i;
+ u32 expect;
+
+ for_each_mocs(expect, table, i) {
+ if (**vaddr != expect) {
+ pr_err("%s: Invalid MOCS[%d] entry, found %08x, expected %08x\n",
+ engine->name, i, **vaddr, expect);
+ return -EINVAL;
+ }
+ ++*vaddr;
+ }
+
+ return 0;
+}
+
+static bool mcr_range(struct drm_i915_private *i915, u32 offset)
+{
+ /*
+ * Registers in this range are affected by the MCR selector
+ * which only controls CPU initiated MMIO. Routing does not
+ * work for CS access so we cannot verify them on this path.
+ */
+ return INTEL_GEN(i915) >= 8 && offset >= 0xb000 && offset <= 0xb4ff;
+}
+
+static int check_l3cc_table(struct intel_engine_cs *engine,
+ const struct drm_i915_mocs_table *table,
+ uint32_t **vaddr)
+{
+ /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */
+ u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
+ unsigned int i;
+ u32 expect;
+
+ for_each_l3cc(expect, table, i) {
+ if (!mcr_range(engine->i915, reg) && **vaddr != expect) {
+ pr_err("%s: Invalid L3CC[%d] entry, found %08x, expected %08x\n",
+ engine->name, i, **vaddr, expect);
+ return -EINVAL;
+ }
+ ++*vaddr;
+ reg += 4;
+ }
+
+ return 0;
+}
+
+static int check_mocs_engine(struct live_mocs *arg,
+ struct intel_context *ce)
+{
+ struct i915_vma *vma = arg->scratch;
+ struct i915_request *rq;
+ u32 offset;
+ u32 *vaddr;
+ int err;
+
+ memset32(arg->vaddr, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_vma_lock(vma);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (!err)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(vma);
+
+ /* Read the mocs tables back using SRM */
+ offset = i915_ggtt_offset(vma);
+ if (!err)
+ err = read_mocs_table(rq, &arg->table, &offset);
+ if (!err && ce->engine->class == RENDER_CLASS)
+ err = read_l3cc_table(rq, &arg->table, &offset);
+ offset -= i915_ggtt_offset(vma);
+ GEM_BUG_ON(offset > PAGE_SIZE);
+
+ err = request_add_sync(rq, err);
+ if (err)
+ return err;
+
+ /* Compare the results against the expected tables */
+ vaddr = arg->vaddr;
+ if (!err)
+ err = check_mocs_table(ce->engine, &arg->table, &vaddr);
+ if (!err && ce->engine->class == RENDER_CLASS)
+ err = check_l3cc_table(ce->engine, &arg->table, &vaddr);
+ if (err)
+ return err;
+
+ GEM_BUG_ON(arg->vaddr + offset != vaddr);
+ return 0;
+}
+
+static int live_mocs_kernel(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err;
+
+ /* Basic check the system is configured with the expected mocs table */
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ for_each_engine(engine, gt, id) {
+ intel_engine_pm_get(engine);
+ err = check_mocs_engine(&mocs, engine->kernel_context);
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+static int live_mocs_clean(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err;
+
+ /* Every new context should see the same mocs table */
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ err = check_mocs_engine(&mocs, ce);
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+static int active_engine_reset(struct intel_context *ce,
+ const char *reason)
+{
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ int err;
+
+ err = igt_spinner_init(&spin, ce->engine->gt);
+ if (err)
+ return err;
+
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ if (IS_ERR(rq)) {
+ igt_spinner_fini(&spin);
+ return PTR_ERR(rq);
+ }
+
+ err = request_add_spin(rq, &spin);
+ if (err == 0)
+ err = intel_engine_reset(ce->engine, reason);
+
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+ return err;
+}
+
+static int __live_mocs_reset(struct live_mocs *mocs,
+ struct intel_context *ce)
+{
+ int err;
+
+ err = intel_engine_reset(ce->engine, "mocs");
+ if (err)
+ return err;
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ err = active_engine_reset(ce, "mocs");
+ if (err)
+ return err;
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs");
+
+ err = check_mocs_engine(mocs, ce);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int live_mocs_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ struct live_mocs mocs;
+ int err = 0;
+
+ /* Check the mocs setup is retained over per-engine and global resets */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ err = live_mocs_init(&mocs, gt);
+ if (err)
+ return err;
+
+ igt_global_reset_lock(gt);
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ break;
+ }
+
+ intel_engine_pm_get(engine);
+ err = __live_mocs_reset(&mocs, ce);
+ intel_engine_pm_put(engine);
+
+ intel_context_put(ce);
+ if (err)
+ break;
+ }
+ igt_global_reset_unlock(gt);
+
+ live_mocs_fini(&mocs);
+ return err;
+}
+
+int intel_mocs_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_mocs_kernel),
+ SUBTEST(live_mocs_clean),
+ SUBTEST(live_mocs_reset),
+ };
+ struct drm_i915_mocs_table table;
+
+ if (!get_mocs_settings(i915, &table))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c
new file mode 100644
index 000000000000..8cc55a0e9e06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c
@@ -0,0 +1,203 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "intel_context.h"
+#include "intel_engine_pm.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
+#include "selftest_rc6.h"
+
+#include "selftests/i915_random.h"
+
+int live_rc6_manual(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_rc6 *rc6 = &gt->rc6;
+ intel_wakeref_t wakeref;
+ u64 res[2];
+ int err = 0;
+
+ /*
+ * Our claim is that we can "encourage" the GPU to enter rc6 at will.
+ * Let's try it!
+ */
+
+ if (!rc6->enabled)
+ return 0;
+
+ /* bsw/byt use a PCU and decouple RC6 from our manual control */
+ if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
+ return 0;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ /* Force RC6 off for starters */
+ __intel_rc6_disable(rc6);
+ msleep(1); /* wakeup is not immediate, takes about 100us on icl */
+
+ res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ msleep(250);
+ res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ if ((res[1] - res[0]) >> 10) {
+ pr_err("RC6 residency increased by %lldus while disabled for 250ms!\n",
+ (res[1] - res[0]) >> 10);
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* Manually enter RC6 */
+ intel_rc6_park(rc6);
+
+ res[0] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+ msleep(100);
+ res[1] = intel_rc6_residency_ns(rc6, GEN6_GT_GFX_RC6);
+
+ if (res[1] == res[0]) {
+ pr_err("Did not enter RC6! RC6_STATE=%08x, RC6_CONTROL=%08x\n",
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_STATE),
+ intel_uncore_read_fw(gt->uncore, GEN6_RC_CONTROL));
+ err = -EINVAL;
+ }
+
+ /* Restore what should have been the original state! */
+ intel_rc6_unpark(rc6);
+
+out_unlock:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ return err;
+}
+
+static const u32 *__live_rc6_ctx(struct intel_context *ce)
+{
+ struct i915_request *rq;
+ const u32 *result;
+ u32 cmd;
+ u32 *cs;
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq))
+ return ERR_CAST(rq);
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs)) {
+ i915_request_add(rq);
+ return cs;
+ }
+
+ cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+ if (INTEL_GEN(rq->i915) >= 8)
+ cmd++;
+
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(GEN8_RC6_CTX_INFO);
+ *cs++ = ce->timeline->hwsp_offset + 8;
+ *cs++ = 0;
+ intel_ring_advance(rq, cs);
+
+ result = rq->hwsp_seqno + 2;
+ i915_request_add(rq);
+
+ return result;
+}
+
+static struct intel_engine_cs **
+randomised_engines(struct intel_gt *gt,
+ struct rnd_state *prng,
+ unsigned int *count)
+{
+ struct intel_engine_cs *engine, **engines;
+ enum intel_engine_id id;
+ int n;
+
+ n = 0;
+ for_each_engine(engine, gt, id)
+ n++;
+ if (!n)
+ return NULL;
+
+ engines = kmalloc_array(n, sizeof(*engines), GFP_KERNEL);
+ if (!engines)
+ return NULL;
+
+ n = 0;
+ for_each_engine(engine, gt, id)
+ engines[n++] = engine;
+
+ i915_prandom_shuffle(engines, sizeof(*engines), n, prng);
+
+ *count = n;
+ return engines;
+}
+
+int live_rc6_ctx_wa(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs **engines;
+ unsigned int n, count;
+ I915_RND_STATE(prng);
+ int err = 0;
+
+ /* A read of CTX_INFO upsets rc6. Poke the bear! */
+ if (INTEL_GEN(gt->i915) < 8)
+ return 0;
+
+ engines = randomised_engines(gt, &prng, &count);
+ if (!engines)
+ return 0;
+
+ for (n = 0; n < count; n++) {
+ struct intel_engine_cs *engine = engines[n];
+ int pass;
+
+ for (pass = 0; pass < 2; pass++) {
+ struct intel_context *ce;
+ unsigned int resets =
+ i915_reset_engine_count(&gt->i915->gpu_error,
+ engine);
+ const u32 *res;
+
+ /* Use a sacrifical context */
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ err = PTR_ERR(ce);
+ goto out;
+ }
+
+ intel_engine_pm_get(engine);
+ res = __live_rc6_ctx(ce);
+ intel_engine_pm_put(engine);
+ intel_context_put(ce);
+ if (IS_ERR(res)) {
+ err = PTR_ERR(res);
+ goto out;
+ }
+
+ if (intel_gt_wait_for_idle(gt, HZ / 5) == -ETIME) {
+ intel_gt_set_wedged(gt);
+ err = -ETIME;
+ goto out;
+ }
+
+ intel_gt_pm_wait_for_idle(gt);
+ pr_debug("%s: CTX_INFO=%0x\n",
+ engine->name, READ_ONCE(*res));
+
+ if (resets !=
+ i915_reset_engine_count(&gt->i915->gpu_error,
+ engine)) {
+ pr_err("%s: GPU reset required\n",
+ engine->name);
+ add_taint_for_CI(TAINT_WARN);
+ err = -EIO;
+ goto out;
+ }
+ }
+ }
+
+out:
+ kfree(engines);
+ return err;
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.h b/drivers/gpu/drm/i915/gt/selftest_rc6.h
new file mode 100644
index 000000000000..762fd442d7b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_rc6.h
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef SELFTEST_RC6_H
+#define SELFTEST_RC6_H
+
+int live_rc6_ctx_wa(void *arg);
+int live_rc6_manual(void *arg);
+
+#endif /* SELFTEST_RC6_H */
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index b5c590c9ccba..6ad6aca315f6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -9,26 +9,29 @@
static int igt_global_reset(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
unsigned int reset_count;
+ intel_wakeref_t wakeref;
int err = 0;
/* Check that we can issue a global GPU reset */
- igt_global_reset_lock(i915);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reset_count = i915_reset_count(&i915->gpu_error);
+ reset_count = i915_reset_count(&gt->i915->gpu_error);
- i915_reset(i915, ALL_ENGINES, NULL);
+ intel_gt_reset(gt, ALL_ENGINES, NULL);
- if (i915_reset_count(&i915->gpu_error) == reset_count) {
+ if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
err = -EINVAL;
}
- igt_global_reset_unlock(i915);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
- if (i915_reset_failed(i915))
+ if (intel_gt_is_wedged(gt))
err = -EIO;
return err;
@@ -36,64 +39,123 @@ static int igt_global_reset(void *arg)
static int igt_wedged_reset(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
intel_wakeref_t wakeref;
/* Check that we can recover a wedged device with a GPU reset */
- igt_global_reset_lock(i915);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- i915_gem_set_wedged(i915);
+ intel_gt_set_wedged(gt);
- GEM_BUG_ON(!i915_reset_failed(i915));
- i915_reset(i915, ALL_ENGINES, NULL);
+ GEM_BUG_ON(!intel_gt_is_wedged(gt));
+ intel_gt_reset(gt, ALL_ENGINES, NULL);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(i915);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
- return i915_reset_failed(i915) ? -EIO : 0;
+ return intel_gt_is_wedged(gt) ? -EIO : 0;
}
static int igt_atomic_reset(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
int err = 0;
/* Check that the resets are usable from atomic context */
- igt_global_reset_lock(i915);
- mutex_lock(&i915->drm.struct_mutex);
+ intel_gt_pm_get(gt);
+ igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
- if (!igt_force_reset(i915))
+ if (!igt_force_reset(gt))
goto unlock;
for (p = igt_atomic_phases; p->name; p++) {
intel_engine_mask_t awake;
- GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+ GEM_TRACE("__intel_gt_reset under %s\n", p->name);
- awake = reset_prepare(i915);
+ awake = reset_prepare(gt);
p->critical_section_begin();
- reset_prepare(i915);
- err = intel_gpu_reset(i915, ALL_ENGINES);
+
+ err = __intel_gt_reset(gt, ALL_ENGINES);
+
p->critical_section_end();
- reset_finish(i915, awake);
+ reset_finish(gt, awake);
if (err) {
- pr_err("intel_gpu_reset failed under %s\n", p->name);
+ pr_err("__intel_gt_reset failed under %s\n", p->name);
break;
}
}
/* As we poke around the guts, do a full reset before continuing. */
- igt_force_reset(i915);
+ igt_force_reset(gt);
unlock:
- mutex_unlock(&i915->drm.struct_mutex);
- igt_global_reset_unlock(i915);
+ igt_global_reset_unlock(gt);
+ intel_gt_pm_put(gt);
+
+ return err;
+}
+
+static int igt_atomic_engine_reset(void *arg)
+{
+ struct intel_gt *gt = arg;
+ const typeof(*igt_atomic_phases) *p;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /* Check that the resets are usable from atomic context */
+
+ if (!intel_has_reset_engine(gt))
+ return 0;
+
+ if (USES_GUC_SUBMISSION(gt->i915))
+ return 0;
+
+ intel_gt_pm_get(gt);
+ igt_global_reset_lock(gt);
+
+ /* Flush any requests before we get started and check basics */
+ if (!igt_force_reset(gt))
+ goto out_unlock;
+
+ for_each_engine(engine, gt, id) {
+ tasklet_disable(&engine->execlists.tasklet);
+ intel_engine_pm_get(engine);
+
+ for (p = igt_atomic_phases; p->name; p++) {
+ GEM_TRACE("intel_engine_reset(%s) under %s\n",
+ engine->name, p->name);
+
+ p->critical_section_begin();
+ err = intel_engine_reset(engine, NULL);
+ p->critical_section_end();
+
+ if (err) {
+ pr_err("intel_engine_reset(%s) failed under %s\n",
+ engine->name, p->name);
+ break;
+ }
+ }
+
+ intel_engine_pm_put(engine);
+ tasklet_enable(&engine->execlists.tasklet);
+ if (err)
+ break;
+ }
+
+ /* As we poke around the guts, do a full reset before continuing. */
+ igt_force_reset(gt);
+
+out_unlock:
+ igt_global_reset_unlock(gt);
+ intel_gt_pm_put(gt);
return err;
}
@@ -104,18 +166,15 @@ int intel_reset_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_global_reset), /* attempt to recover GPU first */
SUBTEST(igt_wedged_reset),
SUBTEST(igt_atomic_reset),
+ SUBTEST(igt_atomic_engine_reset),
};
- intel_wakeref_t wakeref;
- int err = 0;
+ struct intel_gt *gt = &i915->gt;
- if (!intel_has_gpu_reset(i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(gt))
return -EIO; /* we're long past hope of a successful reset */
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
- err = i915_subtests(tests, i915);
-
- return err;
+ return intel_gt_live_subtests(tests, gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c
new file mode 100644
index 000000000000..e2d78cc22fb4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c
@@ -0,0 +1,836 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "intel_engine_pm.h"
+#include "intel_gt.h"
+#include "intel_gt_requests.h"
+#include "intel_ring.h"
+
+#include "../selftests/i915_random.h"
+#include "../i915_selftest.h"
+
+#include "../selftests/igt_flush_test.h"
+#include "../selftests/mock_gem_device.h"
+#include "selftests/mock_timeline.h"
+
+static struct page *hwsp_page(struct intel_timeline *tl)
+{
+ struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ return sg_page(obj->mm.pages->sgl);
+}
+
+static unsigned long hwsp_cacheline(struct intel_timeline *tl)
+{
+ unsigned long address = (unsigned long)page_address(hwsp_page(tl));
+
+ return (address + tl->hwsp_offset) / CACHELINE_BYTES;
+}
+
+#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
+
+struct mock_hwsp_freelist {
+ struct intel_gt *gt;
+ struct radix_tree_root cachelines;
+ struct intel_timeline **history;
+ unsigned long count, max;
+ struct rnd_state prng;
+};
+
+enum {
+ SHUFFLE = BIT(0),
+};
+
+static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
+ unsigned int idx,
+ struct intel_timeline *tl)
+{
+ tl = xchg(&state->history[idx], tl);
+ if (tl) {
+ radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
+ intel_timeline_put(tl);
+ }
+}
+
+static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
+ unsigned int count,
+ unsigned int flags)
+{
+ struct intel_timeline *tl;
+ unsigned int idx;
+
+ while (count--) {
+ unsigned long cacheline;
+ int err;
+
+ tl = intel_timeline_create(state->gt, NULL);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ cacheline = hwsp_cacheline(tl);
+ err = radix_tree_insert(&state->cachelines, cacheline, tl);
+ if (err) {
+ if (err == -EEXIST) {
+ pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
+ cacheline);
+ }
+ intel_timeline_put(tl);
+ return err;
+ }
+
+ idx = state->count++ % state->max;
+ __mock_hwsp_record(state, idx, tl);
+ }
+
+ if (flags & SHUFFLE)
+ i915_prandom_shuffle(state->history,
+ sizeof(*state->history),
+ min(state->count, state->max),
+ &state->prng);
+
+ count = i915_prandom_u32_max_state(min(state->count, state->max),
+ &state->prng);
+ while (count--) {
+ idx = --state->count % state->max;
+ __mock_hwsp_record(state, idx, NULL);
+ }
+
+ return 0;
+}
+
+static int mock_hwsp_freelist(void *arg)
+{
+ struct mock_hwsp_freelist state;
+ struct drm_i915_private *i915;
+ const struct {
+ const char *name;
+ unsigned int flags;
+ } phases[] = {
+ { "linear", 0 },
+ { "shuffled", SHUFFLE },
+ { },
+ }, *p;
+ unsigned int na;
+ int err = 0;
+
+ i915 = mock_gem_device();
+ if (!i915)
+ return -ENOMEM;
+
+ INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
+ state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
+
+ state.gt = &i915->gt;
+
+ /*
+ * Create a bunch of timelines and check that their HWSP do not overlap.
+ * Free some, and try again.
+ */
+
+ state.max = PAGE_SIZE / sizeof(*state.history);
+ state.count = 0;
+ state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
+ if (!state.history) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ for (p = phases; p->name; p++) {
+ pr_debug("%s(%s)\n", __func__, p->name);
+ for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
+ err = __mock_hwsp_timeline(&state, na, p->flags);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ for (na = 0; na < state.max; na++)
+ __mock_hwsp_record(&state, na, NULL);
+ kfree(state.history);
+err_put:
+ drm_dev_put(&i915->drm);
+ return err;
+}
+
+struct __igt_sync {
+ const char *name;
+ u32 seqno;
+ bool expected;
+ bool set;
+};
+
+static int __igt_sync(struct intel_timeline *tl,
+ u64 ctx,
+ const struct __igt_sync *p,
+ const char *name)
+{
+ int ret;
+
+ if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
+ pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
+ name, p->name, ctx, p->seqno, yesno(p->expected));
+ return -EINVAL;
+ }
+
+ if (p->set) {
+ ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int igt_sync(void *arg)
+{
+ const struct __igt_sync pass[] = {
+ { "unset", 0, false, false },
+ { "new", 0, false, true },
+ { "0a", 0, true, true },
+ { "1a", 1, false, true },
+ { "1b", 1, true, true },
+ { "0b", 0, true, false },
+ { "2a", 2, false, true },
+ { "4", 4, false, true },
+ { "INT_MAX", INT_MAX, false, true },
+ { "INT_MAX-1", INT_MAX-1, true, false },
+ { "INT_MAX+1", (u32)INT_MAX+1, false, true },
+ { "INT_MAX", INT_MAX, true, false },
+ { "UINT_MAX", UINT_MAX, false, true },
+ { "wrap", 0, false, true },
+ { "unwrap", UINT_MAX, true, false },
+ {},
+ }, *p;
+ struct intel_timeline tl;
+ int order, offset;
+ int ret = -ENODEV;
+
+ mock_timeline_init(&tl, 0);
+ for (p = pass; p->name; p++) {
+ for (order = 1; order < 64; order++) {
+ for (offset = -1; offset <= (order > 1); offset++) {
+ u64 ctx = BIT_ULL(order) + offset;
+
+ ret = __igt_sync(&tl, ctx, p, "1");
+ if (ret)
+ goto out;
+ }
+ }
+ }
+ mock_timeline_fini(&tl);
+
+ mock_timeline_init(&tl, 0);
+ for (order = 1; order < 64; order++) {
+ for (offset = -1; offset <= (order > 1); offset++) {
+ u64 ctx = BIT_ULL(order) + offset;
+
+ for (p = pass; p->name; p++) {
+ ret = __igt_sync(&tl, ctx, p, "2");
+ if (ret)
+ goto out;
+ }
+ }
+ }
+
+out:
+ mock_timeline_fini(&tl);
+ return ret;
+}
+
+static unsigned int random_engine(struct rnd_state *rnd)
+{
+ return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
+}
+
+static int bench_sync(void *arg)
+{
+ struct rnd_state prng;
+ struct intel_timeline tl;
+ unsigned long end_time, count;
+ u64 prng32_1M;
+ ktime_t kt;
+ int order, last_order;
+
+ mock_timeline_init(&tl, 0);
+
+ /* Lookups from cache are very fast and so the random number generation
+ * and the loop itself becomes a significant factor in the per-iteration
+ * timings. We try to compensate the results by measuring the overhead
+ * of the prng and subtract it from the reported results.
+ */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ u32 x;
+
+ /* Make sure the compiler doesn't optimise away the prng call */
+ WRITE_ONCE(x, prandom_u32_state(&prng));
+
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ pr_debug("%s: %lu random evaluations, %lluns/prng\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+ prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
+
+ /* Benchmark (only) setting random context ids */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ u64 id = i915_prandom_u64_state(&prng);
+
+ __intel_timeline_sync_set(&tl, id, 0);
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+ pr_info("%s: %lu random insertions, %lluns/insert\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ /* Benchmark looking up the exact same context ids as we just set */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ end_time = count;
+ kt = ktime_get();
+ while (end_time--) {
+ u64 id = i915_prandom_u64_state(&prng);
+
+ if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
+ mock_timeline_fini(&tl);
+ pr_err("Lookup of %llu failed\n", id);
+ return -EINVAL;
+ }
+ }
+ kt = ktime_sub(ktime_get(), kt);
+ kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+ pr_info("%s: %lu random lookups, %lluns/lookup\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ mock_timeline_fini(&tl);
+ cond_resched();
+
+ mock_timeline_init(&tl, 0);
+
+ /* Benchmark setting the first N (in order) contexts */
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ __intel_timeline_sync_set(&tl, count++, 0);
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ pr_info("%s: %lu in-order insertions, %lluns/insert\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ /* Benchmark looking up the exact same context ids as we just set */
+ end_time = count;
+ kt = ktime_get();
+ while (end_time--) {
+ if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
+ pr_err("Lookup of %lu failed\n", end_time);
+ mock_timeline_fini(&tl);
+ return -EINVAL;
+ }
+ }
+ kt = ktime_sub(ktime_get(), kt);
+ pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+
+ mock_timeline_fini(&tl);
+ cond_resched();
+
+ mock_timeline_init(&tl, 0);
+
+ /* Benchmark searching for a random context id and maybe changing it */
+ prandom_seed_state(&prng, i915_selftest.random_seed);
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ u32 id = random_engine(&prng);
+ u32 seqno = prandom_u32_state(&prng);
+
+ if (!__intel_timeline_sync_is_later(&tl, id, seqno))
+ __intel_timeline_sync_set(&tl, id, seqno);
+
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
+ pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
+ __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
+ mock_timeline_fini(&tl);
+ cond_resched();
+
+ /* Benchmark searching for a known context id and changing the seqno */
+ for (last_order = 1, order = 1; order < 32;
+ ({ int tmp = last_order; last_order = order; order += tmp; })) {
+ unsigned int mask = BIT(order) - 1;
+
+ mock_timeline_init(&tl, 0);
+
+ count = 0;
+ kt = ktime_get();
+ end_time = jiffies + HZ/10;
+ do {
+ /* Without assuming too many details of the underlying
+ * implementation, try to identify its phase-changes
+ * (if any)!
+ */
+ u64 id = (u64)(count & mask) << order;
+
+ __intel_timeline_sync_is_later(&tl, id, 0);
+ __intel_timeline_sync_set(&tl, id, 0);
+
+ count++;
+ } while (!time_after(jiffies, end_time));
+ kt = ktime_sub(ktime_get(), kt);
+ pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
+ __func__, count, order,
+ (long long)div64_ul(ktime_to_ns(kt), count));
+ mock_timeline_fini(&tl);
+ cond_resched();
+ }
+
+ return 0;
+}
+
+int intel_timeline_mock_selftests(void)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(mock_hwsp_freelist),
+ SUBTEST(igt_sync),
+ SUBTEST(bench_sync),
+ };
+
+ return i915_subtests(tests, NULL);
+}
+
+static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ if (INTEL_GEN(rq->i915) >= 8) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = addr;
+ *cs++ = 0;
+ *cs++ = value;
+ } else if (INTEL_GEN(rq->i915) >= 4) {
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = 0;
+ *cs++ = addr;
+ *cs++ = value;
+ } else {
+ *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+ *cs++ = addr;
+ *cs++ = value;
+ *cs++ = MI_NOOP;
+ }
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static struct i915_request *
+tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
+{
+ struct i915_request *rq;
+ int err;
+
+ err = intel_timeline_pin(tl);
+ if (err) {
+ rq = ERR_PTR(err);
+ goto out;
+ }
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ goto out_unpin;
+
+ i915_request_get(rq);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
+ i915_request_add(rq);
+ if (err) {
+ i915_request_put(rq);
+ rq = ERR_PTR(err);
+ }
+
+out_unpin:
+ intel_timeline_unpin(tl);
+out:
+ if (IS_ERR(rq))
+ pr_err("Failed to write to timeline!\n");
+ return rq;
+}
+
+static struct intel_timeline *
+checked_intel_timeline_create(struct intel_gt *gt)
+{
+ struct intel_timeline *tl;
+
+ tl = intel_timeline_create(gt, NULL);
+ if (IS_ERR(tl))
+ return tl;
+
+ if (*tl->hwsp_seqno != tl->seqno) {
+ pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
+ *tl->hwsp_seqno, tl->seqno);
+ intel_timeline_put(tl);
+ return ERR_PTR(-EINVAL);
+ }
+
+ return tl;
+}
+
+static int live_hwsp_engine(void *arg)
+{
+#define NUM_TIMELINES 4096
+ struct intel_gt *gt = arg;
+ struct intel_timeline **timelines;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long count, n;
+ int err = 0;
+
+ /*
+ * Create a bunch of timelines and check we can write
+ * independently to each of their breadcrumb slots.
+ */
+
+ timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+ sizeof(*timelines),
+ GFP_KERNEL);
+ if (!timelines)
+ return -ENOMEM;
+
+ count = 0;
+ for_each_engine(engine, gt, id) {
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ intel_engine_pm_get(engine);
+
+ for (n = 0; n < NUM_TIMELINES; n++) {
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+
+ tl = checked_intel_timeline_create(gt);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ break;
+ }
+
+ rq = tl_write(tl, engine, count);
+ if (IS_ERR(rq)) {
+ intel_timeline_put(tl);
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ timelines[count++] = tl;
+ i915_request_put(rq);
+ }
+
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ for (n = 0; n < count; n++) {
+ struct intel_timeline *tl = timelines[n];
+
+ if (!err && *tl->hwsp_seqno != n) {
+ pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ n, *tl->hwsp_seqno);
+ err = -EINVAL;
+ }
+ intel_timeline_put(tl);
+ }
+
+ kvfree(timelines);
+ return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_alternate(void *arg)
+{
+#define NUM_TIMELINES 4096
+ struct intel_gt *gt = arg;
+ struct intel_timeline **timelines;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long count, n;
+ int err = 0;
+
+ /*
+ * Create a bunch of timelines and check we can write
+ * independently to each of their breadcrumb slots with adjacent
+ * engines.
+ */
+
+ timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
+ sizeof(*timelines),
+ GFP_KERNEL);
+ if (!timelines)
+ return -ENOMEM;
+
+ count = 0;
+ for (n = 0; n < NUM_TIMELINES; n++) {
+ for_each_engine(engine, gt, id) {
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ tl = checked_intel_timeline_create(gt);
+ if (IS_ERR(tl)) {
+ intel_engine_pm_put(engine);
+ err = PTR_ERR(tl);
+ goto out;
+ }
+
+ intel_engine_pm_get(engine);
+ rq = tl_write(tl, engine, count);
+ intel_engine_pm_put(engine);
+ if (IS_ERR(rq)) {
+ intel_timeline_put(tl);
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ timelines[count++] = tl;
+ i915_request_put(rq);
+ }
+ }
+
+out:
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ for (n = 0; n < count; n++) {
+ struct intel_timeline *tl = timelines[n];
+
+ if (!err && *tl->hwsp_seqno != n) {
+ pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ n, *tl->hwsp_seqno);
+ err = -EINVAL;
+ }
+ intel_timeline_put(tl);
+ }
+
+ kvfree(timelines);
+ return err;
+#undef NUM_TIMELINES
+}
+
+static int live_hwsp_wrap(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ struct intel_timeline *tl;
+ enum intel_engine_id id;
+ int err = 0;
+
+ /*
+ * Across a seqno wrap, we need to keep the old cacheline alive for
+ * foreign GPU references.
+ */
+
+ tl = intel_timeline_create(gt, NULL);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
+ goto out_free;
+
+ err = intel_timeline_pin(tl);
+ if (err)
+ goto out_free;
+
+ for_each_engine(engine, gt, id) {
+ const u32 *hwsp_seqno[2];
+ struct i915_request *rq;
+ u32 seqno[2];
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ goto out;
+ }
+
+ tl->seqno = -4u;
+
+ mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
+ err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
+ mutex_unlock(&tl->mutex);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
+ seqno[0], tl->hwsp_offset);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ hwsp_seqno[0] = tl->hwsp_seqno;
+
+ mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
+ err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
+ mutex_unlock(&tl->mutex);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
+ seqno[1], tl->hwsp_offset);
+
+ err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
+ if (err) {
+ i915_request_add(rq);
+ goto out;
+ }
+ hwsp_seqno[1] = tl->hwsp_seqno;
+
+ /* With wrap should come a new hwsp */
+ GEM_BUG_ON(seqno[1] >= seqno[0]);
+ GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
+
+ i915_request_add(rq);
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("Wait for timeline writes timed out!\n");
+ err = -EIO;
+ goto out;
+ }
+
+ if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) {
+ pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
+ *hwsp_seqno[0], *hwsp_seqno[1],
+ seqno[0], seqno[1]);
+ err = -EINVAL;
+ goto out;
+ }
+
+ intel_gt_retire_requests(gt); /* recycle HWSP */
+ }
+
+out:
+ if (igt_flush_test(gt->i915))
+ err = -EIO;
+
+ intel_timeline_unpin(tl);
+out_free:
+ intel_timeline_put(tl);
+ return err;
+}
+
+static int live_hwsp_recycle(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ unsigned long count;
+ int err = 0;
+
+ /*
+ * Check seqno writes into one timeline at a time. We expect to
+ * recycle the breadcrumb slot between iterations and neither
+ * want to confuse ourselves or the GPU.
+ */
+
+ count = 0;
+ for_each_engine(engine, gt, id) {
+ IGT_TIMEOUT(end_time);
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ intel_engine_pm_get(engine);
+
+ do {
+ struct intel_timeline *tl;
+ struct i915_request *rq;
+
+ tl = checked_intel_timeline_create(gt);
+ if (IS_ERR(tl)) {
+ err = PTR_ERR(tl);
+ break;
+ }
+
+ rq = tl_write(tl, engine, count);
+ if (IS_ERR(rq)) {
+ intel_timeline_put(tl);
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ pr_err("Wait for timeline writes timed out!\n");
+ i915_request_put(rq);
+ intel_timeline_put(tl);
+ err = -EIO;
+ break;
+ }
+
+ if (*tl->hwsp_seqno != count) {
+ pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n",
+ count, *tl->hwsp_seqno);
+ err = -EINVAL;
+ }
+
+ i915_request_put(rq);
+ intel_timeline_put(tl);
+ count++;
+
+ if (err)
+ break;
+ } while (!__igt_timeout(end_time, NULL));
+
+ intel_engine_pm_put(engine);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int intel_timeline_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_hwsp_recycle),
+ SUBTEST(live_hwsp_engine),
+ SUBTEST(live_hwsp_alternate),
+ SUBTEST(live_hwsp_wrap),
+ };
+
+ if (intel_gt_is_wedged(&i915->gt))
+ return 0;
+
+ return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 44becd9538be..ac1921854cbf 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -5,13 +5,14 @@
*/
#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
#include "i915_selftest.h"
#include "intel_reset.h"
#include "selftests/igt_flush_test.h"
#include "selftests/igt_reset.h"
#include "selftests/igt_spinner.h"
-#include "selftests/igt_wedge_me.h"
#include "selftests/mock_drm.h"
#include "gem/selftests/igt_gem_utils.h"
@@ -24,53 +25,70 @@ static const struct wo_register {
{ INTEL_GEMINILAKE, 0x731c }
};
-#define REF_NAME_MAX (INTEL_ENGINE_CS_MAX_NAME + 8)
struct wa_lists {
struct i915_wa_list gt_wa_list;
struct {
- char name[REF_NAME_MAX];
struct i915_wa_list wa_list;
struct i915_wa_list ctx_wa_list;
} engine[I915_NUM_ENGINES];
};
+static int request_add_sync(struct i915_request *rq, int err)
+{
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (i915_request_wait(rq, 0, HZ / 5) < 0)
+ err = -EIO;
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
+{
+ int err = 0;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+ if (spin && !igt_wait_for_spinner(spin, rq))
+ err = -ETIMEDOUT;
+ i915_request_put(rq);
+
+ return err;
+}
+
static void
-reference_lists_init(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_init(struct intel_gt *gt, struct wa_lists *lists)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
memset(lists, 0, sizeof(*lists));
- wa_init_start(&lists->gt_wa_list, "GT_REF");
- gt_init_workarounds(i915, &lists->gt_wa_list);
+ wa_init_start(&lists->gt_wa_list, "GT_REF", "global");
+ gt_init_workarounds(gt->i915, &lists->gt_wa_list);
wa_init_finish(&lists->gt_wa_list);
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
struct i915_wa_list *wal = &lists->engine[id].wa_list;
- char *name = lists->engine[id].name;
-
- snprintf(name, REF_NAME_MAX, "%s_REF", engine->name);
- wa_init_start(wal, name);
+ wa_init_start(wal, "REF", engine->name);
engine_init_workarounds(engine, wal);
wa_init_finish(wal);
- snprintf(name, REF_NAME_MAX, "%s_CTX_REF", engine->name);
-
__intel_engine_init_ctx_wa(engine,
&lists->engine[id].ctx_wa_list,
- name);
+ "CTX_REF");
}
}
static void
-reference_lists_fini(struct drm_i915_private *i915, struct wa_lists *lists)
+reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
- for_each_engine(engine, i915, id)
+ for_each_engine(engine, gt, id)
intel_wa_list_free(&lists->engine[id].wa_list);
intel_wa_list_free(&lists->gt_wa_list);
@@ -102,7 +120,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
i915_gem_object_flush_map(result);
i915_gem_object_unpin_map(result);
- vma = i915_vma_instance(result, &engine->i915->ggtt.vm, NULL);
+ vma = i915_vma_instance(result, &engine->gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -119,7 +137,9 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
}
i915_vma_lock(vma);
- err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_request_await_object(rq, vma->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unlock(vma);
if (err)
goto err_req;
@@ -184,7 +204,7 @@ static int check_whitelist(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *results;
- struct igt_wedge_me wedge;
+ struct intel_wedge_me wedge;
u32 *vaddr;
int err;
int i;
@@ -195,10 +215,10 @@ static int check_whitelist(struct i915_gem_context *ctx,
err = 0;
i915_gem_object_lock(results);
- igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
+ intel_wedge_on_timeout(&wedge, engine->gt, HZ / 5) /* safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
i915_gem_object_unlock(results);
- if (i915_terminally_wedged(ctx->i915))
+ if (intel_gt_is_wedged(engine->gt))
err = -EIO;
if (err)
goto out_put;
@@ -231,35 +251,29 @@ out_put:
static int do_device_reset(struct intel_engine_cs *engine)
{
- i915_reset(engine->i915, engine->mask, "live_workarounds");
+ intel_gt_reset(engine->gt, engine->mask, "live_workarounds");
return 0;
}
static int do_engine_reset(struct intel_engine_cs *engine)
{
- return i915_reset_engine(engine, "live_workarounds");
+ return intel_engine_reset(engine, "live_workarounds");
}
static int
switch_to_scratch_context(struct intel_engine_cs *engine,
struct igt_spinner *spin)
{
- struct i915_gem_context *ctx;
+ struct intel_context *ce;
struct i915_request *rq;
- intel_wakeref_t wakeref;
int err = 0;
- ctx = kernel_context(engine->i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
-
- GEM_BUG_ON(i915_gem_context_is_bannable(ctx));
-
- rq = ERR_PTR(-ENODEV);
- with_intel_runtime_pm(&engine->i915->runtime_pm, wakeref)
- rq = igt_spinner_create_request(spin, ctx, engine, MI_NOOP);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
- kernel_context_close(ctx);
+ rq = igt_spinner_create_request(spin, ce, MI_NOOP);
+ intel_context_put(ce);
if (IS_ERR(rq)) {
spin = NULL;
@@ -267,13 +281,7 @@ switch_to_scratch_context(struct intel_engine_cs *engine,
goto err;
}
- i915_request_add(rq);
-
- if (spin && !igt_wait_for_spinner(spin, rq)) {
- pr_err("Spinner failed to start\n");
- err = -ETIMEDOUT;
- }
-
+ err = request_add_spin(rq, spin);
err:
if (err && spin)
igt_spinner_end(spin);
@@ -286,79 +294,82 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
const char *name)
{
struct drm_i915_private *i915 = engine->i915;
- struct i915_gem_context *ctx;
+ struct i915_gem_context *ctx, *tmp;
struct igt_spinner spin;
intel_wakeref_t wakeref;
int err;
- pr_info("Checking %d whitelisted registers (RING_NONPRIV) [%s]\n",
- engine->whitelist.count, name);
-
- err = igt_spinner_init(&spin, i915);
- if (err)
- return err;
+ pr_info("Checking %d whitelisted registers on %s (RING_NONPRIV) [%s]\n",
+ engine->whitelist.count, engine->name, name);
ctx = kernel_context(i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ err = igt_spinner_init(&spin, engine->gt);
+ if (err)
+ goto out_ctx;
+
err = check_whitelist(ctx, engine);
if (err) {
pr_err("Invalid whitelist *before* %s reset!\n", name);
- goto out;
+ goto out_spin;
}
err = switch_to_scratch_context(engine, &spin);
if (err)
- goto out;
+ goto out_spin;
- with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+ with_intel_runtime_pm(engine->uncore->rpm, wakeref)
err = reset(engine);
igt_spinner_end(&spin);
- igt_spinner_fini(&spin);
if (err) {
pr_err("%s reset failed\n", name);
- goto out;
+ goto out_spin;
}
err = check_whitelist(ctx, engine);
if (err) {
pr_err("Whitelist not preserved in context across %s reset!\n",
name);
- goto out;
+ goto out_spin;
}
+ tmp = kernel_context(i915);
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto out_spin;
+ }
kernel_context_close(ctx);
-
- ctx = kernel_context(i915);
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
+ ctx = tmp;
err = check_whitelist(ctx, engine);
if (err) {
pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
name);
- goto out;
+ goto out_spin;
}
-out:
+out_spin:
+ igt_spinner_fini(&spin);
+out_ctx:
kernel_context_close(ctx);
return err;
}
-static struct i915_vma *create_batch(struct i915_gem_context *ctx)
+static struct i915_vma *create_batch(struct i915_address_space *vm)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int err;
- obj = i915_gem_object_create_internal(ctx->i915, 16 * PAGE_SIZE);
+ obj = i915_gem_object_create_internal(vm->i915, 16 * PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
- vma = i915_vma_instance(obj, ctx->vm, NULL);
+ vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
@@ -393,6 +404,10 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg)
enum intel_platform platform = INTEL_INFO(engine->i915)->platform;
int i;
+ if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
+ RING_FORCE_TO_NONPRIV_ACCESS_WR)
+ return true;
+
for (i = 0; i < ARRAY_SIZE(wo_registers); i++) {
if (wo_registers[i].platform == platform &&
wo_registers[i].reg == reg)
@@ -404,7 +419,8 @@ static bool wo_register(struct intel_engine_cs *engine, u32 reg)
static bool ro_register(u32 reg)
{
- if (reg & RING_FORCE_TO_NONPRIV_RD)
+ if ((reg & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
+ RING_FORCE_TO_NONPRIV_ACCESS_RD)
return true;
return false;
@@ -425,8 +441,7 @@ static int whitelist_writable_count(struct intel_engine_cs *engine)
return count;
}
-static int check_dirty_whitelist(struct i915_gem_context *ctx,
- struct intel_engine_cs *engine)
+static int check_dirty_whitelist(struct intel_context *ce)
{
const u32 values[] = {
0x00000000,
@@ -454,16 +469,17 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
0xffff00ff,
0xffffffff,
};
+ struct intel_engine_cs *engine = ce->engine;
struct i915_vma *scratch;
struct i915_vma *batch;
int err = 0, i, v;
u32 *cs, *results;
- scratch = create_scratch(ctx->vm, 2 * ARRAY_SIZE(values) + 1);
+ scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1);
if (IS_ERR(scratch))
return PTR_ERR(scratch);
- batch = create_batch(ctx);
+ batch = create_batch(ce->vm);
if (IS_ERR(batch)) {
err = PTR_ERR(batch);
goto out_scratch;
@@ -476,16 +492,19 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
u32 srm, lrm, rsvd;
u32 expect;
int idx;
+ bool ro_reg;
if (wo_register(engine, reg))
continue;
- if (ro_register(reg))
- continue;
+ ro_reg = ro_register(reg);
+
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
srm = MI_STORE_REGISTER_MEM;
lrm = MI_LOAD_REGISTER_MEM;
- if (INTEL_GEN(ctx->i915) >= 8)
+ if (INTEL_GEN(engine->i915) >= 8)
lrm++, srm++;
pr_debug("%s: Writing garbage to %x\n",
@@ -542,9 +561,9 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
i915_gem_object_flush_map(batch->obj);
i915_gem_object_unpin_map(batch->obj);
- i915_gem_chipset_flush(ctx->i915);
+ intel_gt_chipset_flush(engine->gt);
- rq = igt_request_alloc(ctx, engine);
+ rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_batch;
@@ -556,6 +575,14 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
goto err_request;
}
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto err_request;
+
err = engine->emit_bb_start(rq,
batch->node.start, PAGE_SIZE,
0);
@@ -563,15 +590,11 @@ static int check_dirty_whitelist(struct i915_gem_context *ctx,
goto err_request;
err_request:
- i915_request_add(rq);
- if (err)
- goto out_batch;
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+ err = request_add_sync(rq, err);
+ if (err) {
pr_err("%s: Futzing %x timedout; cancelling test\n",
engine->name, reg);
- i915_gem_set_wedged(ctx->i915);
- err = -EIO;
+ intel_gt_set_wedged(engine->gt);
goto out_batch;
}
@@ -582,24 +605,35 @@ err_request:
}
GEM_BUG_ON(values[ARRAY_SIZE(values) - 1] != 0xffffffff);
- rsvd = results[ARRAY_SIZE(values)]; /* detect write masking */
- if (!rsvd) {
- pr_err("%s: Unable to write to whitelisted register %x\n",
- engine->name, reg);
- err = -EINVAL;
- goto out_unpin;
+ if (!ro_reg) {
+ /* detect write masking */
+ rsvd = results[ARRAY_SIZE(values)];
+ if (!rsvd) {
+ pr_err("%s: Unable to write to whitelisted register %x\n",
+ engine->name, reg);
+ err = -EINVAL;
+ goto out_unpin;
+ }
}
expect = results[0];
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
- expect = reg_write(expect, values[v], rsvd);
+ if (ro_reg)
+ expect = results[0];
+ else
+ expect = reg_write(expect, values[v], rsvd);
+
if (results[idx] != expect)
err++;
idx++;
}
for (v = 0; v < ARRAY_SIZE(values); v++) {
- expect = reg_write(expect, ~values[v], rsvd);
+ if (ro_reg)
+ expect = results[0];
+ else
+ expect = reg_write(expect, ~values[v], rsvd);
+
if (results[idx] != expect)
err++;
idx++;
@@ -608,15 +642,22 @@ err_request:
pr_err("%s: %d mismatch between values written to whitelisted register [%x], and values read back!\n",
engine->name, err, reg);
- pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n",
- engine->name, reg, results[0], rsvd);
+ if (ro_reg)
+ pr_info("%s: Whitelisted read-only register: %x, original value %08x\n",
+ engine->name, reg, results[0]);
+ else
+ pr_info("%s: Whitelisted register: %x, original value %08x, rsvd %08x\n",
+ engine->name, reg, results[0], rsvd);
expect = results[0];
idx = 1;
for (v = 0; v < ARRAY_SIZE(values); v++) {
u32 w = values[v];
- expect = reg_write(expect, w, rsvd);
+ if (ro_reg)
+ expect = results[0];
+ else
+ expect = reg_write(expect, w, rsvd);
pr_info("Wrote %08x, read %08x, expect %08x\n",
w, results[idx], expect);
idx++;
@@ -624,7 +665,10 @@ err_request:
for (v = 0; v < ARRAY_SIZE(values); v++) {
u32 w = ~values[v];
- expect = reg_write(expect, w, rsvd);
+ if (ro_reg)
+ expect = results[0];
+ else
+ expect = reg_write(expect, w, rsvd);
pr_info("Wrote %08x, read %08x, expect %08x\n",
w, results[idx], expect);
idx++;
@@ -638,7 +682,7 @@ out_unpin:
break;
}
- if (igt_flush_test(ctx->i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(engine->i915))
err = -EIO;
out_batch:
i915_vma_unpin_and_release(&batch, 0);
@@ -649,84 +693,68 @@ out_scratch:
static int live_dirty_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
- struct i915_gem_context *ctx;
enum intel_engine_id id;
- intel_wakeref_t wakeref;
- struct drm_file *file;
- int err = 0;
/* Can the user write to the whitelisted registers? */
- if (INTEL_GEN(i915) < 7) /* minimum requirement for LRI, SRM, LRM */
+ if (INTEL_GEN(gt->i915) < 7) /* minimum requirement for LRI, SRM, LRM */
return 0;
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
-
- mutex_unlock(&i915->drm.struct_mutex);
- file = mock_file(i915);
- mutex_lock(&i915->drm.struct_mutex);
- if (IS_ERR(file)) {
- err = PTR_ERR(file);
- goto out_rpm;
- }
-
- ctx = live_context(i915, file);
- if (IS_ERR(ctx)) {
- err = PTR_ERR(ctx);
- goto out_file;
- }
+ for_each_engine(engine, gt, id) {
+ struct intel_context *ce;
+ int err;
- for_each_engine(engine, i915, id) {
if (engine->whitelist.count == 0)
continue;
- err = check_dirty_whitelist(ctx, engine);
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce))
+ return PTR_ERR(ce);
+
+ err = check_dirty_whitelist(ce);
+ intel_context_put(ce);
if (err)
- goto out_file;
+ return err;
}
-out_file:
- mutex_unlock(&i915->drm.struct_mutex);
- mock_file_free(i915, file);
- mutex_lock(&i915->drm.struct_mutex);
-out_rpm:
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- return err;
+ return 0;
}
static int live_reset_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine = i915->engine[RCS0];
+ struct intel_gt *gt = arg;
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
int err = 0;
/* If we reset the gpu, we should not lose the RING_NONPRIV */
+ igt_global_reset_lock(gt);
- if (!engine || engine->whitelist.count == 0)
- return 0;
-
- igt_global_reset_lock(i915);
+ for_each_engine(engine, gt, id) {
+ if (engine->whitelist.count == 0)
+ continue;
- if (intel_has_reset_engine(i915)) {
- err = check_whitelist_across_reset(engine,
- do_engine_reset,
- "engine");
- if (err)
- goto out;
- }
+ if (intel_has_reset_engine(gt)) {
+ err = check_whitelist_across_reset(engine,
+ do_engine_reset,
+ "engine");
+ if (err)
+ goto out;
+ }
- if (intel_has_gpu_reset(i915)) {
- err = check_whitelist_across_reset(engine,
- do_device_reset,
- "device");
- if (err)
- goto out;
+ if (intel_has_gpu_reset(gt)) {
+ err = check_whitelist_across_reset(engine,
+ do_device_reset,
+ "device");
+ if (err)
+ goto out;
+ }
}
out:
- igt_global_reset_unlock(i915);
+ igt_global_reset_unlock(gt);
return err;
}
@@ -742,6 +770,14 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
if (IS_ERR(rq))
return PTR_ERR(rq);
+ i915_vma_lock(results);
+ err = i915_request_await_object(rq, results->obj, true);
+ if (err == 0)
+ err = i915_vma_move_to_active(results, rq, EXEC_OBJECT_WRITE);
+ i915_vma_unlock(results);
+ if (err)
+ goto err_req;
+
srm = MI_STORE_REGISTER_MEM;
if (INTEL_GEN(ctx->i915) >= 8)
srm++;
@@ -756,8 +792,8 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
u64 offset = results->node.start + sizeof(u32) * i;
u32 reg = i915_mmio_reg_offset(engine->whitelist.list[i].reg);
- /* Clear RD only and WR only flags */
- reg &= ~(RING_FORCE_TO_NONPRIV_RD | RING_FORCE_TO_NONPRIV_WR);
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
*cs++ = srm;
*cs++ = reg;
@@ -767,23 +803,21 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
intel_ring_advance(rq, cs);
err_req:
- i915_request_add(rq);
-
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -EIO;
-
- return err;
+ return request_add_sync(rq, err);
}
static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
+ struct i915_address_space *vm;
struct i915_request *rq;
struct i915_vma *batch;
int i, err = 0;
u32 *cs;
- batch = create_batch(ctx);
+ vm = i915_gem_context_get_vm_rcu(ctx);
+ batch = create_batch(vm);
+ i915_vm_put(vm);
if (IS_ERR(batch))
return PTR_ERR(batch);
@@ -800,13 +834,16 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
if (ro_register(reg))
continue;
+ /* Clear non priv flags */
+ reg &= RING_FORCE_TO_NONPRIV_ADDRESS_MASK;
+
*cs++ = reg;
*cs++ = 0xffffffff;
}
*cs++ = MI_BATCH_BUFFER_END;
i915_gem_object_flush_map(batch->obj);
- i915_gem_chipset_flush(ctx->i915);
+ intel_gt_chipset_flush(engine->gt);
rq = igt_request_alloc(ctx, engine);
if (IS_ERR(rq)) {
@@ -820,13 +857,19 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
goto err_request;
}
+ i915_vma_lock(batch);
+ err = i915_request_await_object(rq, batch->obj, false);
+ if (err == 0)
+ err = i915_vma_move_to_active(batch, rq, 0);
+ i915_vma_unlock(batch);
+ if (err)
+ goto err_request;
+
/* Perform the writes from an unprivileged "user" batch */
err = engine->emit_bb_start(rq, batch->node.start, 0, 0);
err_request:
- i915_request_add(rq);
- if (i915_request_wait(rq, 0, HZ / 5) < 0)
- err = -EIO;
+ err = request_add_sync(rq, err);
err_unpin:
i915_gem_object_unpin_map(batch->obj);
@@ -927,7 +970,8 @@ check_whitelisted_registers(struct intel_engine_cs *engine,
for (i = 0; i < engine->whitelist.count; i++) {
const struct i915_wa *wa = &engine->whitelist.list[i];
- if (i915_mmio_reg_offset(wa->reg) & RING_FORCE_TO_NONPRIV_RD)
+ if (i915_mmio_reg_offset(wa->reg) &
+ RING_FORCE_TO_NONPRIV_ACCESS_RD)
continue;
if (!fn(engine, a[i], b[i], wa->reg))
@@ -942,7 +986,7 @@ err_a:
static int live_isolated_whitelist(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct {
struct i915_gem_context *ctx;
struct i915_vma *scratch[2];
@@ -956,40 +1000,46 @@ static int live_isolated_whitelist(void *arg)
* invisible to a second context.
*/
- if (!intel_engines_has_context_isolation(i915))
- return 0;
-
- if (!i915->kernel_context->vm)
+ if (!intel_engines_has_context_isolation(gt->i915))
return 0;
for (i = 0; i < ARRAY_SIZE(client); i++) {
+ struct i915_address_space *vm;
struct i915_gem_context *c;
- c = kernel_context(i915);
+ c = kernel_context(gt->i915);
if (IS_ERR(c)) {
err = PTR_ERR(c);
goto err;
}
- client[i].scratch[0] = create_scratch(c->vm, 1024);
+ vm = i915_gem_context_get_vm_rcu(c);
+
+ client[i].scratch[0] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[0])) {
err = PTR_ERR(client[i].scratch[0]);
+ i915_vm_put(vm);
kernel_context_close(c);
goto err;
}
- client[i].scratch[1] = create_scratch(c->vm, 1024);
+ client[i].scratch[1] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[1])) {
err = PTR_ERR(client[i].scratch[1]);
i915_vma_unpin_and_release(&client[i].scratch[0], 0);
+ i915_vm_put(vm);
kernel_context_close(c);
goto err;
}
client[i].ctx = c;
+ i915_vm_put(vm);
}
- for_each_engine(engine, i915, id) {
+ for_each_engine(engine, gt, id) {
+ if (!engine->kernel_context->vm)
+ continue;
+
if (!whitelist_writable_count(engine))
continue;
@@ -1043,7 +1093,7 @@ err:
kernel_context_close(client[i].ctx);
}
- if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ if (igt_flush_test(gt->i915))
err = -EIO;
return err;
@@ -1060,7 +1110,7 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str);
- for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ for_each_gem_engine(ce, i915_gem_context_engines(ctx), it) {
enum intel_engine_id id = ce->engine->id;
ok &= engine_wa_list_verify(ce,
@@ -1071,7 +1121,6 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
&lists->engine[id].ctx_wa_list,
str) == 0;
}
- i915_gem_context_unlock_engines(ctx);
return ok;
}
@@ -1079,39 +1128,42 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
static int
live_gpu_reset_workarounds(void *arg)
{
- struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt = arg;
struct i915_gem_context *ctx;
intel_wakeref_t wakeref;
struct wa_lists lists;
bool ok;
- if (!intel_has_gpu_reset(i915))
+ if (!intel_has_gpu_reset(gt))
return 0;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
+ i915_gem_context_lock_engines(ctx);
+
pr_info("Verifying after GPU reset...\n");
- igt_global_reset_lock(i915);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reference_lists_init(i915, &lists);
+ reference_lists_init(gt, &lists);
ok = verify_wa_lists(ctx, &lists, "before reset");
if (!ok)
goto out;
- i915_reset(i915, ALL_ENGINES, "live_workarounds");
+ intel_gt_reset(gt, ALL_ENGINES, "live_workarounds");
ok = verify_wa_lists(ctx, &lists, "after reset");
out:
+ i915_gem_context_unlock_engines(ctx);
kernel_context_close(ctx);
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(i915);
+ reference_lists_fini(gt, &lists);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
return ok ? 0 : -ESRCH;
}
@@ -1119,29 +1171,30 @@ out:
static int
live_engine_reset_workarounds(void *arg)
{
- struct drm_i915_private *i915 = arg;
- struct intel_engine_cs *engine;
+ struct intel_gt *gt = arg;
+ struct i915_gem_engines_iter it;
struct i915_gem_context *ctx;
+ struct intel_context *ce;
struct igt_spinner spin;
- enum intel_engine_id id;
struct i915_request *rq;
intel_wakeref_t wakeref;
struct wa_lists lists;
int ret = 0;
- if (!intel_has_reset_engine(i915))
+ if (!intel_has_reset_engine(gt))
return 0;
- ctx = kernel_context(i915);
+ ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- igt_global_reset_lock(i915);
- wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+ igt_global_reset_lock(gt);
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
- reference_lists_init(i915, &lists);
+ reference_lists_init(gt, &lists);
- for_each_engine(engine, i915, id) {
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ struct intel_engine_cs *engine = ce->engine;
bool ok;
pr_info("Verifying after %s reset...\n", engine->name);
@@ -1152,7 +1205,7 @@ live_engine_reset_workarounds(void *arg)
goto err;
}
- i915_reset_engine(engine, "live_workarounds");
+ intel_engine_reset(engine, "live_workarounds");
ok = verify_wa_lists(ctx, &lists, "after idle reset");
if (!ok) {
@@ -1160,27 +1213,25 @@ live_engine_reset_workarounds(void *arg)
goto err;
}
- ret = igt_spinner_init(&spin, i915);
+ ret = igt_spinner_init(&spin, engine->gt);
if (ret)
goto err;
- rq = igt_spinner_create_request(&spin, ctx, engine, MI_NOOP);
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
igt_spinner_fini(&spin);
goto err;
}
- i915_request_add(rq);
-
- if (!igt_wait_for_spinner(&spin, rq)) {
+ ret = request_add_spin(rq, &spin);
+ if (ret) {
pr_err("Spinner failed to start\n");
igt_spinner_fini(&spin);
- ret = -ETIMEDOUT;
goto err;
}
- i915_reset_engine(engine, "live_workarounds");
+ intel_engine_reset(engine, "live_workarounds");
igt_spinner_end(&spin);
igt_spinner_fini(&spin);
@@ -1191,14 +1242,14 @@ live_engine_reset_workarounds(void *arg)
goto err;
}
}
-
err:
- reference_lists_fini(i915, &lists);
- intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- igt_global_reset_unlock(i915);
+ i915_gem_context_unlock_engines(ctx);
+ reference_lists_fini(gt, &lists);
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ igt_global_reset_unlock(gt);
kernel_context_close(ctx);
- igt_flush_test(i915, I915_WAIT_LOCKED);
+ igt_flush_test(gt->i915);
return ret;
}
@@ -1212,14 +1263,9 @@ int intel_workarounds_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_gpu_reset_workarounds),
SUBTEST(live_engine_reset_workarounds),
};
- int err;
- if (i915_terminally_wedged(i915))
+ if (intel_gt_is_wedged(&i915->gt))
return 0;
- mutex_lock(&i915->drm.struct_mutex);
- err = i915_subtests(tests, i915);
- mutex_unlock(&i915->drm.struct_mutex);
-
- return err;
+ return intel_gt_live_subtests(tests, &i915->gt);
}
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
new file mode 100644
index 000000000000..aeb1d1f616e8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#include "../intel_timeline.h"
+
+#include "mock_timeline.h"
+
+void mock_timeline_init(struct intel_timeline *timeline, u64 context)
+{
+ timeline->gt = NULL;
+ timeline->fence_context = context;
+
+ mutex_init(&timeline->mutex);
+
+ INIT_ACTIVE_FENCE(&timeline->last_request);
+ INIT_LIST_HEAD(&timeline->requests);
+
+ i915_syncmap_init(&timeline->sync);
+
+ INIT_LIST_HEAD(&timeline->link);
+}
+
+void mock_timeline_fini(struct intel_timeline *timeline)
+{
+ i915_syncmap_free(&timeline->sync);
+}
diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
new file mode 100644
index 000000000000..d2bcc3df6183
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h
@@ -0,0 +1,17 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017-2018 Intel Corporation
+ */
+
+#ifndef __MOCK_TIMELINE__
+#define __MOCK_TIMELINE__
+
+#include <linux/types.h>
+
+struct intel_timeline;
+
+void mock_timeline_init(struct intel_timeline *timeline, u64 context);
+void mock_timeline_fini(struct intel_timeline *timeline);
+
+#endif /* !__MOCK_TIMELINE__ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
new file mode 100644
index 000000000000..5d00a3b2d914
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_irq.h"
+#include "gt/intel_gt_pm_irq.h"
+#include "intel_guc.h"
+#include "intel_guc_ads.h"
+#include "intel_guc_submission.h"
+#include "i915_drv.h"
+
+/**
+ * DOC: GuC
+ *
+ * The GuC is a microcontroller inside the GT HW, introduced in gen9. The GuC is
+ * designed to offload some of the functionality usually performed by the host
+ * driver; currently the main operations it can take care of are:
+ *
+ * - Authentication of the HuC, which is required to fully enable HuC usage.
+ * - Low latency graphics context scheduling (a.k.a. GuC submission).
+ * - GT Power management.
+ *
+ * The enable_guc module parameter can be used to select which of those
+ * operations to enable within GuC. Note that not all the operations are
+ * supported on all gen9+ platforms.
+ *
+ * Enabling the GuC is not mandatory and therefore the firmware is only loaded
+ * if at least one of the operations is selected. However, not loading the GuC
+ * might result in the loss of some features that do require the GuC (currently
+ * just the HuC, but more are expected to land in the future).
+ */
+
+void intel_guc_notify(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ /*
+ * On Gen11+, the value written to the register is passes as a payload
+ * to the FW. However, the FW currently treats all values the same way
+ * (H2G interrupt), so we can just write the value that the HW expects
+ * on older gens.
+ */
+ intel_uncore_write(gt->uncore, guc->notify_reg, GUC_SEND_TRIGGER);
+}
+
+static inline i915_reg_t guc_send_reg(struct intel_guc *guc, u32 i)
+{
+ GEM_BUG_ON(!guc->send_regs.base);
+ GEM_BUG_ON(!guc->send_regs.count);
+ GEM_BUG_ON(i >= guc->send_regs.count);
+
+ return _MMIO(guc->send_regs.base + 4 * i);
+}
+
+void intel_guc_init_send_regs(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ enum forcewake_domains fw_domains = 0;
+ unsigned int i;
+
+ if (INTEL_GEN(gt->i915) >= 11) {
+ guc->send_regs.base =
+ i915_mmio_reg_offset(GEN11_SOFT_SCRATCH(0));
+ guc->send_regs.count = GEN11_SOFT_SCRATCH_COUNT;
+ } else {
+ guc->send_regs.base = i915_mmio_reg_offset(SOFT_SCRATCH(0));
+ guc->send_regs.count = GUC_MAX_MMIO_MSG_LEN;
+ BUILD_BUG_ON(GUC_MAX_MMIO_MSG_LEN > SOFT_SCRATCH_COUNT);
+ }
+
+ for (i = 0; i < guc->send_regs.count; i++) {
+ fw_domains |= intel_uncore_forcewake_for_reg(gt->uncore,
+ guc_send_reg(guc, i),
+ FW_REG_READ | FW_REG_WRITE);
+ }
+ guc->send_regs.fw_domains = fw_domains;
+}
+
+static void gen9_reset_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen6_gt_pm_reset_iir(gt, gt->pm_guc_events);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_enable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (!guc->interrupts.enabled) {
+ WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) &
+ gt->pm_guc_events);
+ guc->interrupts.enabled = true;
+ gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
+ }
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen9_disable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ assert_rpm_wakelock_held(&gt->i915->runtime_pm);
+
+ spin_lock_irq(&gt->irq_lock);
+ guc->interrupts.enabled = false;
+
+ gen6_gt_pm_disable_irq(gt, gt->pm_guc_events);
+
+ spin_unlock_irq(&gt->irq_lock);
+ intel_synchronize_irq(gt->i915);
+
+ gen9_reset_guc_interrupts(guc);
+}
+
+static void gen11_reset_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ gen11_gt_reset_one_iir(gt, 0, GEN11_GUC);
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_enable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ if (!guc->interrupts.enabled) {
+ u32 events = REG_FIELD_PREP(ENGINE1_MASK, GUC_INTR_GUC2HOST);
+
+ WARN_ON_ONCE(gen11_gt_reset_one_iir(gt, 0, GEN11_GUC));
+ intel_uncore_write(gt->uncore,
+ GEN11_GUC_SG_INTR_ENABLE, events);
+ intel_uncore_write(gt->uncore,
+ GEN11_GUC_SG_INTR_MASK, ~events);
+ guc->interrupts.enabled = true;
+ }
+ spin_unlock_irq(&gt->irq_lock);
+}
+
+static void gen11_disable_guc_interrupts(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ spin_lock_irq(&gt->irq_lock);
+ guc->interrupts.enabled = false;
+
+ intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_MASK, ~0);
+ intel_uncore_write(gt->uncore, GEN11_GUC_SG_INTR_ENABLE, 0);
+
+ spin_unlock_irq(&gt->irq_lock);
+ intel_synchronize_irq(gt->i915);
+
+ gen11_reset_guc_interrupts(guc);
+}
+
+void intel_guc_init_early(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+ intel_guc_fw_init_early(guc);
+ intel_guc_ct_init_early(&guc->ct);
+ intel_guc_log_init_early(&guc->log);
+ intel_guc_submission_init_early(guc);
+
+ mutex_init(&guc->send_mutex);
+ spin_lock_init(&guc->irq_lock);
+ if (INTEL_GEN(i915) >= 11) {
+ guc->notify_reg = GEN11_GUC_HOST_INTERRUPT;
+ guc->interrupts.reset = gen11_reset_guc_interrupts;
+ guc->interrupts.enable = gen11_enable_guc_interrupts;
+ guc->interrupts.disable = gen11_disable_guc_interrupts;
+ } else {
+ guc->notify_reg = GUC_SEND_INTERRUPT;
+ guc->interrupts.reset = gen9_reset_guc_interrupts;
+ guc->interrupts.enable = gen9_enable_guc_interrupts;
+ guc->interrupts.disable = gen9_disable_guc_interrupts;
+ }
+}
+
+static u32 guc_ctl_debug_flags(struct intel_guc *guc)
+{
+ u32 level = intel_guc_log_get_level(&guc->log);
+ u32 flags = 0;
+
+ if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
+ flags |= GUC_LOG_DISABLED;
+ else
+ flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
+ GUC_LOG_VERBOSITY_SHIFT;
+
+ return flags;
+}
+
+static u32 guc_ctl_feature_flags(struct intel_guc *guc)
+{
+ u32 flags = 0;
+
+ if (!intel_guc_is_submission_supported(guc))
+ flags |= GUC_CTL_DISABLE_SCHEDULER;
+
+ return flags;
+}
+
+static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc)
+{
+ u32 flags = 0;
+
+ if (intel_guc_is_submission_supported(guc)) {
+ u32 ctxnum, base;
+
+ base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool);
+ ctxnum = GUC_MAX_STAGE_DESCRIPTORS / 16;
+
+ base >>= PAGE_SHIFT;
+ flags |= (base << GUC_CTL_BASE_ADDR_SHIFT) |
+ (ctxnum << GUC_CTL_CTXNUM_IN16_SHIFT);
+ }
+ return flags;
+}
+
+static u32 guc_ctl_log_params_flags(struct intel_guc *guc)
+{
+ u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT;
+ u32 flags;
+
+ #if (((CRASH_BUFFER_SIZE) % SZ_1M) == 0)
+ #define UNIT SZ_1M
+ #define FLAG GUC_LOG_ALLOC_IN_MEGABYTE
+ #else
+ #define UNIT SZ_4K
+ #define FLAG 0
+ #endif
+
+ BUILD_BUG_ON(!CRASH_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(CRASH_BUFFER_SIZE, UNIT));
+ BUILD_BUG_ON(!DPC_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(DPC_BUFFER_SIZE, UNIT));
+ BUILD_BUG_ON(!ISR_BUFFER_SIZE);
+ BUILD_BUG_ON(!IS_ALIGNED(ISR_BUFFER_SIZE, UNIT));
+
+ BUILD_BUG_ON((CRASH_BUFFER_SIZE / UNIT - 1) >
+ (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
+ BUILD_BUG_ON((DPC_BUFFER_SIZE / UNIT - 1) >
+ (GUC_LOG_DPC_MASK >> GUC_LOG_DPC_SHIFT));
+ BUILD_BUG_ON((ISR_BUFFER_SIZE / UNIT - 1) >
+ (GUC_LOG_ISR_MASK >> GUC_LOG_ISR_SHIFT));
+
+ flags = GUC_LOG_VALID |
+ GUC_LOG_NOTIFY_ON_HALF_FULL |
+ FLAG |
+ ((CRASH_BUFFER_SIZE / UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
+ ((DPC_BUFFER_SIZE / UNIT - 1) << GUC_LOG_DPC_SHIFT) |
+ ((ISR_BUFFER_SIZE / UNIT - 1) << GUC_LOG_ISR_SHIFT) |
+ (offset << GUC_LOG_BUF_ADDR_SHIFT);
+
+ #undef UNIT
+ #undef FLAG
+
+ return flags;
+}
+
+static u32 guc_ctl_ads_flags(struct intel_guc *guc)
+{
+ u32 ads = intel_guc_ggtt_offset(guc, guc->ads_vma) >> PAGE_SHIFT;
+ u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+
+ return flags;
+}
+
+/*
+ * Initialise the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+static void guc_init_params(struct intel_guc *guc)
+{
+ u32 *params = guc->params;
+ int i;
+
+ BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32));
+
+ params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc);
+ params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc);
+ params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc);
+ params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc);
+ params[GUC_CTL_ADS] = guc_ctl_ads_flags(guc);
+
+ for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+ DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]);
+}
+
+/*
+ * Initialise the GuC parameter block before starting the firmware
+ * transfer. These parameters are read by the firmware on startup
+ * and cannot be changed thereafter.
+ */
+void intel_guc_write_params(struct intel_guc *guc)
+{
+ struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
+ int i;
+
+ /*
+ * All SOFT_SCRATCH registers are in FORCEWAKE_BLITTER domain and
+ * they are power context saved so it's ok to release forcewake
+ * when we are done here and take it again at xfer time.
+ */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_BLITTER);
+
+ intel_uncore_write(uncore, SOFT_SCRATCH(0), 0);
+
+ for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
+ intel_uncore_write(uncore, SOFT_SCRATCH(1 + i), guc->params[i]);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_BLITTER);
+}
+
+int intel_guc_init(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ ret = intel_uc_fw_init(&guc->fw);
+ if (ret)
+ goto err_fetch;
+
+ ret = intel_guc_log_create(&guc->log);
+ if (ret)
+ goto err_fw;
+
+ ret = intel_guc_ads_create(guc);
+ if (ret)
+ goto err_log;
+ GEM_BUG_ON(!guc->ads_vma);
+
+ ret = intel_guc_ct_init(&guc->ct);
+ if (ret)
+ goto err_ads;
+
+ if (intel_guc_is_submission_supported(guc)) {
+ /*
+ * This is stuff we need to have available at fw load time
+ * if we are planning to enable submission later
+ */
+ ret = intel_guc_submission_init(guc);
+ if (ret)
+ goto err_ct;
+ }
+
+ /* now that everything is perma-pinned, initialize the parameters */
+ guc_init_params(guc);
+
+ /* We need to notify the guc whenever we change the GGTT */
+ i915_ggtt_enable_guc(gt->ggtt);
+
+ return 0;
+
+err_ct:
+ intel_guc_ct_fini(&guc->ct);
+err_ads:
+ intel_guc_ads_destroy(guc);
+err_log:
+ intel_guc_log_destroy(&guc->log);
+err_fw:
+ intel_uc_fw_fini(&guc->fw);
+err_fetch:
+ intel_uc_fw_cleanup_fetch(&guc->fw);
+ DRM_DEV_DEBUG_DRIVER(gt->i915->drm.dev, "failed with %d\n", ret);
+ return ret;
+}
+
+void intel_guc_fini(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ if (!intel_uc_fw_is_available(&guc->fw))
+ return;
+
+ i915_ggtt_disable_guc(gt->ggtt);
+
+ if (intel_guc_is_submission_supported(guc))
+ intel_guc_submission_fini(guc);
+
+ intel_guc_ct_fini(&guc->ct);
+
+ intel_guc_ads_destroy(guc);
+ intel_guc_log_destroy(&guc->log);
+ intel_uc_fw_fini(&guc->fw);
+ intel_uc_fw_cleanup_fetch(&guc->fw);
+
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_DISABLED);
+}
+
+/*
+ * This function implements the MMIO based host to GuC interface.
+ */
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
+ u32 *response_buf, u32 response_buf_size)
+{
+ struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
+ u32 status;
+ int i;
+ int ret;
+
+ GEM_BUG_ON(!len);
+ GEM_BUG_ON(len > guc->send_regs.count);
+
+ /* We expect only action code */
+ GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK);
+
+ /* If CT is available, we expect to use MMIO only during init/fini */
+ GEM_BUG_ON(*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+ *action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
+
+ mutex_lock(&guc->send_mutex);
+ intel_uncore_forcewake_get(uncore, guc->send_regs.fw_domains);
+
+ for (i = 0; i < len; i++)
+ intel_uncore_write(uncore, guc_send_reg(guc, i), action[i]);
+
+ intel_uncore_posting_read(uncore, guc_send_reg(guc, i - 1));
+
+ intel_guc_notify(guc);
+
+ /*
+ * No GuC command should ever take longer than 10ms.
+ * Fast commands should still complete in 10us.
+ */
+ ret = __intel_wait_for_register_fw(uncore,
+ guc_send_reg(guc, 0),
+ INTEL_GUC_MSG_TYPE_MASK,
+ INTEL_GUC_MSG_TYPE_RESPONSE <<
+ INTEL_GUC_MSG_TYPE_SHIFT,
+ 10, 10, &status);
+ /* If GuC explicitly returned an error, convert it to -EIO */
+ if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status))
+ ret = -EIO;
+
+ if (ret) {
+ DRM_ERROR("MMIO: GuC action %#x failed with error %d %#x\n",
+ action[0], ret, status);
+ goto out;
+ }
+
+ if (response_buf) {
+ int count = min(response_buf_size, guc->send_regs.count - 1);
+
+ for (i = 0; i < count; i++)
+ response_buf[i] = intel_uncore_read(uncore,
+ guc_send_reg(guc, i + 1));
+ }
+
+ /* Use data from the GuC response as our return value */
+ ret = INTEL_GUC_MSG_TO_DATA(status);
+
+out:
+ intel_uncore_forcewake_put(uncore, guc->send_regs.fw_domains);
+ mutex_unlock(&guc->send_mutex);
+
+ return ret;
+}
+
+int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
+ const u32 *payload, u32 len)
+{
+ u32 msg;
+
+ if (unlikely(!len))
+ return -EPROTO;
+
+ /* Make sure to handle only enabled messages */
+ msg = payload[0] & guc->msg_enabled_mask;
+
+ if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
+ INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED))
+ intel_guc_log_handle_flush_event(&guc->log);
+
+ return 0;
+}
+
+int intel_guc_sample_forcewake(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ u32 action[2];
+
+ action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
+ /* WaRsDisableCoarsePowerGating:skl,cnl */
+ if (!HAS_RC6(dev_priv) || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
+ action[1] = 0;
+ else
+ /* bit 0 and 1 are for Render and Media domain separately */
+ action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+/**
+ * intel_guc_auth_huc() - Send action to GuC to authenticate HuC ucode
+ * @guc: intel_guc structure
+ * @rsa_offset: rsa offset w.r.t ggtt base of huc vma
+ *
+ * Triggers a HuC firmware authentication request to the GuC via intel_guc_send
+ * INTEL_GUC_ACTION_AUTHENTICATE_HUC interface. This function is invoked by
+ * intel_huc_auth().
+ *
+ * Return: non-zero code on error
+ */
+int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_AUTHENTICATE_HUC,
+ rsa_offset
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+/**
+ * intel_guc_suspend() - notify GuC entering suspend state
+ * @guc: the guc
+ */
+int intel_guc_suspend(struct intel_guc *guc)
+{
+ struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
+ int ret;
+ u32 status;
+ u32 action[] = {
+ INTEL_GUC_ACTION_ENTER_S_STATE,
+ GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */
+ };
+
+ /*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to suspend the GuC.
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
+ /*
+ * The ENTER_S_STATE action queues the save/restore operation in GuC FW
+ * and then returns, so waiting on the H2G is not enough to guarantee
+ * GuC is done. When all the processing is done, GuC writes
+ * INTEL_GUC_SLEEP_STATE_SUCCESS to scratch register 14, so we can poll
+ * on that. Note that GuC does not ensure that the value in the register
+ * is different from INTEL_GUC_SLEEP_STATE_SUCCESS while the action is
+ * in progress so we need to take care of that ourselves as well.
+ */
+
+ intel_uncore_write(uncore, SOFT_SCRATCH(14),
+ INTEL_GUC_SLEEP_STATE_INVALID_MASK);
+
+ ret = intel_guc_send(guc, action, ARRAY_SIZE(action));
+ if (ret)
+ return ret;
+
+ ret = __intel_wait_for_register(uncore, SOFT_SCRATCH(14),
+ INTEL_GUC_SLEEP_STATE_INVALID_MASK,
+ 0, 0, 10, &status);
+ if (ret)
+ return ret;
+
+ if (status != INTEL_GUC_SLEEP_STATE_SUCCESS) {
+ DRM_ERROR("GuC failed to change sleep state. "
+ "action=0x%x, err=%u\n",
+ action[0], status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * intel_guc_reset_engine() - ask GuC to reset an engine
+ * @guc: intel_guc structure
+ * @engine: engine to be reset
+ */
+int intel_guc_reset_engine(struct intel_guc *guc,
+ struct intel_engine_cs *engine)
+{
+ /* XXX: to be implemented with submission interface rework */
+
+ return -ENODEV;
+}
+
+/**
+ * intel_guc_resume() - notify GuC resuming from suspend state
+ * @guc: the guc
+ */
+int intel_guc_resume(struct intel_guc *guc)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_EXIT_S_STATE,
+ GUC_POWER_D0,
+ };
+
+ /*
+ * If GuC communication is enabled but submission is not supported,
+ * we do not need to resume the GuC but we do need to enable the
+ * GuC communication on resume (above).
+ */
+ if (!intel_guc_submission_is_enabled(guc))
+ return 0;
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+/**
+ * DOC: GuC Memory Management
+ *
+ * GuC can't allocate any memory for its own usage, so all the allocations must
+ * be handled by the host driver. GuC accesses the memory via the GGTT, with the
+ * exception of the top and bottom parts of the 4GB address space, which are
+ * instead re-mapped by the GuC HW to memory location of the FW itself (WOPCM)
+ * or other parts of the HW. The driver must take care not to place objects that
+ * the GuC is going to access in these reserved ranges. The layout of the GuC
+ * address space is shown below:
+ *
+ * ::
+ *
+ * +===========> +====================+ <== FFFF_FFFF
+ * ^ | Reserved |
+ * | +====================+ <== GUC_GGTT_TOP
+ * | | |
+ * | | DRAM |
+ * GuC | |
+ * Address +===> +====================+ <== GuC ggtt_pin_bias
+ * Space ^ | |
+ * | | | |
+ * | GuC | GuC |
+ * | WOPCM | WOPCM |
+ * | Size | |
+ * | | | |
+ * v v | |
+ * +=======+===> +====================+ <== 0000_0000
+ *
+ * The lower part of GuC Address Space [0, ggtt_pin_bias) is mapped to GuC WOPCM
+ * while upper part of GuC Address Space [ggtt_pin_bias, GUC_GGTT_TOP) is mapped
+ * to DRAM. The value of the GuC ggtt_pin_bias is the GuC WOPCM size.
+ */
+
+/**
+ * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
+ * @guc: the guc
+ * @size: size of area to allocate (both virtual space and memory)
+ *
+ * This is a wrapper to create an object for use with the GuC. In order to
+ * use it inside the GuC, an object needs to be pinned lifetime, so we allocate
+ * both some backing storage and a range inside the Global GTT. We must pin
+ * it in the GGTT somewhere other than than [0, GUC ggtt_pin_bias) because that
+ * range is reserved inside GuC.
+ *
+ * Return: A i915_vma if successful, otherwise an ERR_PTR.
+ */
+struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ u64 flags;
+ int ret;
+
+ obj = i915_gem_object_create_shmem(gt->i915, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
+
+ vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+ ret = i915_vma_pin(vma, 0, 0, flags);
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err;
+ }
+
+ return i915_vma_make_unshrinkable(vma);
+
+err:
+ i915_gem_object_put(obj);
+ return vma;
+}
+
+/**
+ * intel_guc_allocate_and_map_vma() - Allocate and map VMA for GuC usage
+ * @guc: the guc
+ * @size: size of area to allocate (both virtual space and memory)
+ * @out_vma: return variable for the allocated vma pointer
+ * @out_vaddr: return variable for the obj mapping
+ *
+ * This wrapper calls intel_guc_allocate_vma() and then maps the allocated
+ * object with I915_MAP_WB.
+ *
+ * Return: 0 if successful, a negative errno code otherwise.
+ */
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+ struct i915_vma **out_vma, void **out_vaddr)
+{
+ struct i915_vma *vma;
+ void *vaddr;
+
+ vma = intel_guc_allocate_vma(guc, size);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ i915_vma_unpin_and_release(&vma, 0);
+ return PTR_ERR(vaddr);
+ }
+
+ *out_vma = vma;
+ *out_vaddr = vaddr;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
new file mode 100644
index 000000000000..910d49590068
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_H_
+#define _INTEL_GUC_H_
+
+#include "intel_uncore.h"
+#include "intel_guc_fw.h"
+#include "intel_guc_fwif.h"
+#include "intel_guc_ct.h"
+#include "intel_guc_log.h"
+#include "intel_guc_reg.h"
+#include "intel_uc_fw.h"
+#include "i915_utils.h"
+#include "i915_vma.h"
+
+struct __guc_ads_blob;
+
+/*
+ * Top level structure of GuC. It handles firmware loading and manages client
+ * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList
+ * submission.
+ */
+struct intel_guc {
+ struct intel_uc_fw fw;
+ struct intel_guc_log log;
+ struct intel_guc_ct ct;
+
+ /* intel_guc_recv interrupt related state */
+ spinlock_t irq_lock;
+ unsigned int msg_enabled_mask;
+
+ struct {
+ bool enabled;
+ void (*reset)(struct intel_guc *guc);
+ void (*enable)(struct intel_guc *guc);
+ void (*disable)(struct intel_guc *guc);
+ } interrupts;
+
+ bool submission_supported;
+
+ struct i915_vma *ads_vma;
+ struct __guc_ads_blob *ads_blob;
+
+ struct i915_vma *stage_desc_pool;
+ void *stage_desc_pool_vaddr;
+
+ struct i915_vma *workqueue;
+ void *workqueue_vaddr;
+ spinlock_t wq_lock;
+
+ struct i915_vma *proc_desc;
+ void *proc_desc_vaddr;
+
+ /* Control params for fw initialization */
+ u32 params[GUC_CTL_MAX_DWORDS];
+
+ /* GuC's FW specific registers used in MMIO send */
+ struct {
+ u32 base;
+ unsigned int count;
+ enum forcewake_domains fw_domains;
+ } send_regs;
+
+ /* register used to send interrupts to the GuC FW */
+ i915_reg_t notify_reg;
+
+ /* Store msg (e.g. log flush) that we see while CTBs are disabled */
+ u32 mmio_msg;
+
+ /* To serialize the intel_guc_send actions */
+ struct mutex send_mutex;
+};
+
+static
+inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
+{
+ return intel_guc_ct_send(&guc->ct, action, len, NULL, 0);
+}
+
+static inline int
+intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len,
+ u32 *response_buf, u32 response_buf_size)
+{
+ return intel_guc_ct_send(&guc->ct, action, len,
+ response_buf, response_buf_size);
+}
+
+static inline void intel_guc_to_host_event_handler(struct intel_guc *guc)
+{
+ intel_guc_ct_event_handler(&guc->ct);
+}
+
+/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */
+#define GUC_GGTT_TOP 0xFEE00000
+
+/**
+ * intel_guc_ggtt_offset() - Get and validate the GGTT offset of @vma
+ * @guc: intel_guc structure.
+ * @vma: i915 graphics virtual memory area.
+ *
+ * GuC does not allow any gfx GGTT address that falls into range
+ * [0, ggtt.pin_bias), which is reserved for Boot ROM, SRAM and WOPCM.
+ * Currently, in order to exclude [0, ggtt.pin_bias) address space from
+ * GGTT, all gfx objects used by GuC are allocated with intel_guc_allocate_vma()
+ * and pinned with PIN_OFFSET_BIAS along with the value of ggtt.pin_bias.
+ *
+ * Return: GGTT offset of the @vma.
+ */
+static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc,
+ struct i915_vma *vma)
+{
+ u32 offset = i915_ggtt_offset(vma);
+
+ GEM_BUG_ON(offset < i915_ggtt_pin_bias(vma));
+ GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP));
+
+ return offset;
+}
+
+void intel_guc_init_early(struct intel_guc *guc);
+void intel_guc_init_send_regs(struct intel_guc *guc);
+void intel_guc_write_params(struct intel_guc *guc);
+int intel_guc_init(struct intel_guc *guc);
+void intel_guc_fini(struct intel_guc *guc);
+void intel_guc_notify(struct intel_guc *guc);
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len,
+ u32 *response_buf, u32 response_buf_size);
+int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
+ const u32 *payload, u32 len);
+int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset);
+int intel_guc_suspend(struct intel_guc *guc);
+int intel_guc_resume(struct intel_guc *guc);
+struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
+int intel_guc_allocate_and_map_vma(struct intel_guc *guc, u32 size,
+ struct i915_vma **out_vma, void **out_vaddr);
+
+static inline bool intel_guc_is_supported(struct intel_guc *guc)
+{
+ return intel_uc_fw_is_supported(&guc->fw);
+}
+
+static inline bool intel_guc_is_enabled(struct intel_guc *guc)
+{
+ return intel_uc_fw_is_enabled(&guc->fw);
+}
+
+static inline bool intel_guc_is_running(struct intel_guc *guc)
+{
+ return intel_uc_fw_is_running(&guc->fw);
+}
+
+static inline int intel_guc_sanitize(struct intel_guc *guc)
+{
+ intel_uc_fw_sanitize(&guc->fw);
+ guc->mmio_msg = 0;
+
+ return 0;
+}
+
+static inline bool intel_guc_is_submission_supported(struct intel_guc *guc)
+{
+ return guc->submission_supported;
+}
+
+static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask)
+{
+ spin_lock_irq(&guc->irq_lock);
+ guc->msg_enabled_mask |= mask;
+ spin_unlock_irq(&guc->irq_lock);
+}
+
+static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)
+{
+ spin_lock_irq(&guc->irq_lock);
+ guc->msg_enabled_mask &= ~mask;
+ spin_unlock_irq(&guc->irq_lock);
+}
+
+int intel_guc_reset_engine(struct intel_guc *guc,
+ struct intel_engine_cs *engine);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
new file mode 100644
index 000000000000..101728006ae9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "intel_guc_ads.h"
+#include "intel_uc.h"
+#include "i915_drv.h"
+
+/*
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads), the
+ * scheduling policies (guc_policies), a structure describing a collection of
+ * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save
+ * its internal state for sleep.
+ */
+
+static void guc_policy_init(struct guc_policy *policy)
+{
+ policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US;
+ policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US;
+ policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US;
+ policy->policy_flags = 0;
+}
+
+static void guc_policies_init(struct guc_policies *policies)
+{
+ struct guc_policy *policy;
+ u32 p, i;
+
+ policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US;
+ policies->max_num_work_items = POLICY_MAX_NUM_WI;
+
+ for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) {
+ for (i = 0; i < GUC_MAX_ENGINE_CLASSES; i++) {
+ policy = &policies->policy[p][i];
+
+ guc_policy_init(policy);
+ }
+ }
+
+ policies->is_valid = 1;
+}
+
+static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num)
+{
+ memset(pool, 0, num * sizeof(*pool));
+}
+
+/*
+ * The first 80 dwords of the register state context, containing the
+ * execlists and ppgtt registers.
+ */
+#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
+
+/* The ads obj includes the struct itself and buffers passed to GuC */
+struct __guc_ads_blob {
+ struct guc_ads ads;
+ struct guc_policies policies;
+ struct guc_mmio_reg_state reg_state;
+ struct guc_gt_system_info system_info;
+ struct guc_clients_info clients_info;
+ struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE];
+ u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE];
+} __packed;
+
+static void __guc_ads_init(struct intel_guc *guc)
+{
+ struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ struct __guc_ads_blob *blob = guc->ads_blob;
+ const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE;
+ u32 base;
+ u8 engine_class;
+
+ /* GuC scheduling policies */
+ guc_policies_init(&blob->policies);
+
+ /*
+ * GuC expects a per-engine-class context image and size
+ * (minus hwsp and ring context). The context image will be
+ * used to reinitialize engines after a reset. It must exist
+ * and be pinned in the GGTT, so that the address won't change after
+ * we have told GuC where to find it. The context size will be used
+ * to validate that the LRC base + size fall within allowed GGTT.
+ */
+ for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) {
+ if (engine_class == OTHER_CLASS)
+ continue;
+ /*
+ * TODO: Set context pointer to default state to allow
+ * GuC to re-init guilty contexts after internal reset.
+ */
+ blob->ads.golden_context_lrca[engine_class] = 0;
+ blob->ads.eng_state_size[engine_class] =
+ intel_engine_context_size(guc_to_gt(guc),
+ engine_class) -
+ skipped_size;
+ }
+
+ /* System info */
+ blob->system_info.slice_enabled = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask);
+ blob->system_info.rcs_enabled = 1;
+ blob->system_info.bcs_enabled = 1;
+
+ blob->system_info.vdbox_enable_mask = VDBOX_MASK(dev_priv);
+ blob->system_info.vebox_enable_mask = VEBOX_MASK(dev_priv);
+ blob->system_info.vdbox_sfc_support_mask = RUNTIME_INFO(dev_priv)->vdbox_sfc_access;
+
+ base = intel_guc_ggtt_offset(guc, guc->ads_vma);
+
+ /* Clients info */
+ guc_ct_pool_entries_init(blob->ct_pool, ARRAY_SIZE(blob->ct_pool));
+
+ blob->clients_info.clients_num = 1;
+ blob->clients_info.ct_pool_addr = base + ptr_offset(blob, ct_pool);
+ blob->clients_info.ct_pool_count = ARRAY_SIZE(blob->ct_pool);
+
+ /* ADS */
+ blob->ads.scheduler_policies = base + ptr_offset(blob, policies);
+ blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer);
+ blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state);
+ blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
+ blob->ads.clients_info = base + ptr_offset(blob, clients_info);
+
+ i915_gem_object_flush_map(guc->ads_vma->obj);
+}
+
+/**
+ * intel_guc_ads_create() - allocates and initializes GuC ADS.
+ * @guc: intel_guc struct
+ *
+ * GuC needs memory block (Additional Data Struct), where it will store
+ * some data. Allocate and initialize such memory block for GuC use.
+ */
+int intel_guc_ads_create(struct intel_guc *guc)
+{
+ const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+ int ret;
+
+ GEM_BUG_ON(guc->ads_vma);
+
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
+ (void **)&guc->ads_blob);
+
+ if (ret)
+ return ret;
+
+ __guc_ads_init(guc);
+
+ return 0;
+}
+
+void intel_guc_ads_destroy(struct intel_guc *guc)
+{
+ i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP);
+}
+
+/**
+ * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse
+ * @guc: intel_guc struct
+ *
+ * GuC stores some data in ADS, which might be stale after a reset.
+ * Reinitialize whole ADS in case any part of it was corrupted during
+ * previous GuC run.
+ */
+void intel_guc_ads_reset(struct intel_guc *guc)
+{
+ if (!guc->ads_vma)
+ return;
+ __guc_ads_init(guc);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
new file mode 100644
index 000000000000..b00d3ae1113a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_ADS_H_
+#define _INTEL_GUC_ADS_H_
+
+struct intel_guc;
+
+int intel_guc_ads_create(struct intel_guc *guc);
+void intel_guc_ads_destroy(struct intel_guc *guc);
+void intel_guc_ads_reset(struct intel_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
new file mode 100644
index 000000000000..c6f971a049f9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2016-2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_guc_ct.h"
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#define CT_DEBUG_DRIVER(...) DRM_DEBUG_DRIVER(__VA_ARGS__)
+#else
+#define CT_DEBUG_DRIVER(...) do { } while (0)
+#endif
+
+struct ct_request {
+ struct list_head link;
+ u32 fence;
+ u32 status;
+ u32 response_len;
+ u32 *response_buf;
+};
+
+struct ct_incoming_request {
+ struct list_head link;
+ u32 msg[];
+};
+
+enum { CTB_SEND = 0, CTB_RECV = 1 };
+
+enum { CTB_OWNER_HOST = 0 };
+
+static void ct_incoming_request_worker_func(struct work_struct *w);
+
+/**
+ * intel_guc_ct_init_early - Initialize CT state without requiring device access
+ * @ct: pointer to CT struct
+ */
+void intel_guc_ct_init_early(struct intel_guc_ct *ct)
+{
+ spin_lock_init(&ct->requests.lock);
+ INIT_LIST_HEAD(&ct->requests.pending);
+ INIT_LIST_HEAD(&ct->requests.incoming);
+ INIT_WORK(&ct->requests.worker, ct_incoming_request_worker_func);
+}
+
+static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
+{
+ return container_of(ct, struct intel_guc, ct);
+}
+
+static inline const char *guc_ct_buffer_type_to_str(u32 type)
+{
+ switch (type) {
+ case INTEL_GUC_CT_BUFFER_TYPE_SEND:
+ return "SEND";
+ case INTEL_GUC_CT_BUFFER_TYPE_RECV:
+ return "RECV";
+ default:
+ return "<invalid>";
+ }
+}
+
+static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
+ u32 cmds_addr, u32 size)
+{
+ CT_DEBUG_DRIVER("CT: init addr=%#x size=%u\n", cmds_addr, size);
+ memset(desc, 0, sizeof(*desc));
+ desc->addr = cmds_addr;
+ desc->size = size;
+ desc->owner = CTB_OWNER_HOST;
+}
+
+static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
+{
+ CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
+ desc, desc->head, desc->tail);
+ desc->head = 0;
+ desc->tail = 0;
+ desc->is_in_error = 0;
+}
+
+static int guc_action_register_ct_buffer(struct intel_guc *guc,
+ u32 desc_addr,
+ u32 type)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
+ desc_addr,
+ sizeof(struct guc_ct_buffer_desc),
+ type
+ };
+ int err;
+
+ /* Can't use generic send(), CT registration must go over MMIO */
+ err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+ if (err)
+ DRM_ERROR("CT: register %s buffer failed; err=%d\n",
+ guc_ct_buffer_type_to_str(type), err);
+ return err;
+}
+
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
+ u32 type)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
+ CTB_OWNER_HOST,
+ type
+ };
+ int err;
+
+ /* Can't use generic send(), CT deregistration must go over MMIO */
+ err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
+ if (err)
+ DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
+ guc_ct_buffer_type_to_str(type), err);
+ return err;
+}
+
+/**
+ * intel_guc_ct_init - Init buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Allocate memory required for buffer-based communication.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_init(struct intel_guc_ct *ct)
+{
+ struct intel_guc *guc = ct_to_guc(ct);
+ void *blob;
+ int err;
+ int i;
+
+ GEM_BUG_ON(ct->vma);
+
+ /* We allocate 1 page to hold both descriptors and both buffers.
+ * ___________.....................
+ * |desc (SEND)| :
+ * |___________| PAGE/4
+ * :___________....................:
+ * |desc (RECV)| :
+ * |___________| PAGE/4
+ * :_______________________________:
+ * |cmds (SEND) |
+ * | PAGE/4
+ * |_______________________________|
+ * |cmds (RECV) |
+ * | PAGE/4
+ * |_______________________________|
+ *
+ * Each message can use a maximum of 32 dwords and we don't expect to
+ * have more than 1 in flight at any time, so we have enough space.
+ * Some logic further ahead will rely on the fact that there is only 1
+ * page and that it is always mapped, so if the size is changed the
+ * other code will need updating as well.
+ */
+
+ err = intel_guc_allocate_and_map_vma(guc, PAGE_SIZE, &ct->vma, &blob);
+ if (err) {
+ DRM_ERROR("CT: channel allocation failed; err=%d\n", err);
+ return err;
+ }
+
+ CT_DEBUG_DRIVER("CT: vma base=%#x\n",
+ intel_guc_ggtt_offset(guc, ct->vma));
+
+ /* store pointers to desc and cmds */
+ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
+ GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+ ct->ctbs[i].desc = blob + PAGE_SIZE/4 * i;
+ ct->ctbs[i].cmds = blob + PAGE_SIZE/4 * i + PAGE_SIZE/2;
+ }
+
+ return 0;
+}
+
+/**
+ * intel_guc_ct_fini - Fini buffer-based communication
+ * @ct: pointer to CT struct
+ *
+ * Deallocate memory required for buffer-based communication.
+ */
+void intel_guc_ct_fini(struct intel_guc_ct *ct)
+{
+ GEM_BUG_ON(ct->enabled);
+
+ i915_vma_unpin_and_release(&ct->vma, I915_VMA_RELEASE_MAP);
+}
+
+/**
+ * intel_guc_ct_enable - Enable buffer based command transport.
+ * @ct: pointer to CT struct
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_guc_ct_enable(struct intel_guc_ct *ct)
+{
+ struct intel_guc *guc = ct_to_guc(ct);
+ u32 base;
+ int err;
+ int i;
+
+ GEM_BUG_ON(ct->enabled);
+
+ /* vma should be already allocated and map'ed */
+ GEM_BUG_ON(!ct->vma);
+ base = intel_guc_ggtt_offset(guc, ct->vma);
+
+ /* (re)initialize descriptors
+ * cmds buffers are in the second half of the blob page
+ */
+ for (i = 0; i < ARRAY_SIZE(ct->ctbs); i++) {
+ GEM_BUG_ON((i != CTB_SEND) && (i != CTB_RECV));
+ guc_ct_buffer_desc_init(ct->ctbs[i].desc,
+ base + PAGE_SIZE/4 * i + PAGE_SIZE/2,
+ PAGE_SIZE/4);
+ }
+
+ /* register buffers, starting wirh RECV buffer
+ * descriptors are in first half of the blob
+ */
+ err = guc_action_register_ct_buffer(guc,
+ base + PAGE_SIZE/4 * CTB_RECV,
+ INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ if (unlikely(err))
+ goto err_out;
+
+ err = guc_action_register_ct_buffer(guc,
+ base + PAGE_SIZE/4 * CTB_SEND,
+ INTEL_GUC_CT_BUFFER_TYPE_SEND);
+ if (unlikely(err))
+ goto err_deregister;
+
+ ct->enabled = true;
+
+ return 0;
+
+err_deregister:
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_RECV);
+err_out:
+ DRM_ERROR("CT: can't open channel; err=%d\n", err);
+ return err;
+}
+
+/**
+ * intel_guc_ct_disable - Disable buffer based command transport.
+ * @ct: pointer to CT struct
+ */
+void intel_guc_ct_disable(struct intel_guc_ct *ct)
+{
+ struct intel_guc *guc = ct_to_guc(ct);
+
+ GEM_BUG_ON(!ct->enabled);
+
+ ct->enabled = false;
+
+ if (intel_guc_is_running(guc)) {
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_SEND);
+ guc_action_deregister_ct_buffer(guc,
+ INTEL_GUC_CT_BUFFER_TYPE_RECV);
+ }
+}
+
+static u32 ct_get_next_fence(struct intel_guc_ct *ct)
+{
+ /* For now it's trivial */
+ return ++ct->requests.next_fence;
+}
+
+/**
+ * DOC: CTB Host to GuC request
+ *
+ * Format of the CTB Host to GuC request message is as follows::
+ *
+ * +------------+---------+---------+---------+---------+
+ * | msg[0] | [1] | [2] | ... | [n-1] |
+ * +------------+---------+---------+---------+---------+
+ * | MESSAGE | MESSAGE PAYLOAD |
+ * + HEADER +---------+---------+---------+---------+
+ * | | 0 | 1 | ... | n |
+ * +============+=========+=========+=========+=========+
+ * | len >= 1 | FENCE | request specific data |
+ * +------+-----+---------+---------+---------+---------+
+ *
+ * ^-----------------len-------------------^
+ */
+
+static int ctb_write(struct intel_guc_ct_buffer *ctb,
+ const u32 *action,
+ u32 len /* in dwords */,
+ u32 fence,
+ bool want_response)
+{
+ struct guc_ct_buffer_desc *desc = ctb->desc;
+ u32 head = desc->head / 4; /* in dwords */
+ u32 tail = desc->tail / 4; /* in dwords */
+ u32 size = desc->size / 4; /* in dwords */
+ u32 used; /* in dwords */
+ u32 header;
+ u32 *cmds = ctb->cmds;
+ unsigned int i;
+
+ GEM_BUG_ON(desc->size % 4);
+ GEM_BUG_ON(desc->head % 4);
+ GEM_BUG_ON(desc->tail % 4);
+ GEM_BUG_ON(tail >= size);
+
+ /*
+ * tail == head condition indicates empty. GuC FW does not support
+ * using up the entire buffer to get tail == head meaning full.
+ */
+ if (tail < head)
+ used = (size - head) + tail;
+ else
+ used = tail - head;
+
+ /* make sure there is a space including extra dw for the fence */
+ if (unlikely(used + len + 1 >= size))
+ return -ENOSPC;
+
+ /*
+ * Write the message. The format is the following:
+ * DW0: header (including action code)
+ * DW1: fence
+ * DW2+: action data
+ */
+ header = (len << GUC_CT_MSG_LEN_SHIFT) |
+ (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+ (want_response ? GUC_CT_MSG_SEND_STATUS : 0) |
+ (action[0] << GUC_CT_MSG_ACTION_SHIFT);
+
+ CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n",
+ 4, &header, 4, &fence,
+ 4 * (len - 1), &action[1]);
+
+ cmds[tail] = header;
+ tail = (tail + 1) % size;
+
+ cmds[tail] = fence;
+ tail = (tail + 1) % size;
+
+ for (i = 1; i < len; i++) {
+ cmds[tail] = action[i];
+ tail = (tail + 1) % size;
+ }
+
+ /* now update desc tail (back in bytes) */
+ desc->tail = tail * 4;
+ GEM_BUG_ON(desc->tail > desc->size);
+
+ return 0;
+}
+
+/**
+ * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update.
+ * @desc: buffer descriptor
+ * @fence: response fence
+ * @status: placeholder for status
+ *
+ * Guc will update CT buffer descriptor with new fence and status
+ * after processing the command identified by the fence. Wait for
+ * specified fence and then read from the descriptor status of the
+ * command.
+ *
+ * Return:
+ * * 0 response received (status is valid)
+ * * -ETIMEDOUT no response within hardcoded timeout
+ * * -EPROTO no response, CT buffer is in error
+ */
+static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc,
+ u32 fence,
+ u32 *status)
+{
+ int err;
+
+ /*
+ * Fast commands should complete in less than 10us, so sample quickly
+ * up to that length of time, then switch to a slower sleep-wait loop.
+ * No GuC command should ever take longer than 10ms.
+ */
+#define done (READ_ONCE(desc->fence) == fence)
+ err = wait_for_us(done, 10);
+ if (err)
+ err = wait_for(done, 10);
+#undef done
+
+ if (unlikely(err)) {
+ DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
+ fence, desc->fence);
+
+ if (WARN_ON(desc->is_in_error)) {
+ /* Something went wrong with the messaging, try to reset
+ * the buffer and hope for the best
+ */
+ guc_ct_buffer_desc_reset(desc);
+ err = -EPROTO;
+ }
+ }
+
+ *status = desc->status;
+ return err;
+}
+
+/**
+ * wait_for_ct_request_update - Wait for CT request state update.
+ * @req: pointer to pending request
+ * @status: placeholder for status
+ *
+ * For each sent request, Guc shall send bac CT response message.
+ * Our message handler will update status of tracked request once
+ * response message with given fence is received. Wait here and
+ * check for valid response status value.
+ *
+ * Return:
+ * * 0 response received (status is valid)
+ * * -ETIMEDOUT no response within hardcoded timeout
+ */
+static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
+{
+ int err;
+
+ /*
+ * Fast commands should complete in less than 10us, so sample quickly
+ * up to that length of time, then switch to a slower sleep-wait loop.
+ * No GuC command should ever take longer than 10ms.
+ */
+#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status))
+ err = wait_for_us(done, 10);
+ if (err)
+ err = wait_for(done, 10);
+#undef done
+
+ if (unlikely(err))
+ DRM_ERROR("CT: fence %u err %d\n", req->fence, err);
+
+ *status = req->status;
+ return err;
+}
+
+static int ct_send(struct intel_guc_ct *ct,
+ const u32 *action,
+ u32 len,
+ u32 *response_buf,
+ u32 response_buf_size,
+ u32 *status)
+{
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_SEND];
+ struct guc_ct_buffer_desc *desc = ctb->desc;
+ struct ct_request request;
+ unsigned long flags;
+ u32 fence;
+ int err;
+
+ GEM_BUG_ON(!ct->enabled);
+ GEM_BUG_ON(!len);
+ GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+ GEM_BUG_ON(!response_buf && response_buf_size);
+
+ fence = ct_get_next_fence(ct);
+ request.fence = fence;
+ request.status = 0;
+ request.response_len = response_buf_size;
+ request.response_buf = response_buf;
+
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_add_tail(&request.link, &ct->requests.pending);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
+
+ err = ctb_write(ctb, action, len, fence, !!response_buf);
+ if (unlikely(err))
+ goto unlink;
+
+ intel_guc_notify(ct_to_guc(ct));
+
+ if (response_buf)
+ err = wait_for_ct_request_update(&request, status);
+ else
+ err = wait_for_ctb_desc_update(desc, fence, status);
+ if (unlikely(err))
+ goto unlink;
+
+ if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) {
+ err = -EIO;
+ goto unlink;
+ }
+
+ if (response_buf) {
+ /* There shall be no data in the status */
+ WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status));
+ /* Return actual response len */
+ err = request.response_len;
+ } else {
+ /* There shall be no response payload */
+ WARN_ON(request.response_len);
+ /* Return data decoded from the status dword */
+ err = INTEL_GUC_MSG_TO_DATA(*status);
+ }
+
+unlink:
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_del(&request.link);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
+
+ return err;
+}
+
+/*
+ * Command Transport (CT) buffer based GuC send function.
+ */
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buf, u32 response_buf_size)
+{
+ struct intel_guc *guc = ct_to_guc(ct);
+ u32 status = ~0; /* undefined */
+ int ret;
+
+ if (unlikely(!ct->enabled)) {
+ WARN(1, "Unexpected send: action=%#x\n", *action);
+ return -ENODEV;
+ }
+
+ mutex_lock(&guc->send_mutex);
+
+ ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
+ if (unlikely(ret < 0)) {
+ DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
+ action[0], ret, status);
+ } else if (unlikely(ret)) {
+ CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n",
+ action[0], ret, ret);
+ }
+
+ mutex_unlock(&guc->send_mutex);
+ return ret;
+}
+
+static inline unsigned int ct_header_get_len(u32 header)
+{
+ return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK;
+}
+
+static inline unsigned int ct_header_get_action(u32 header)
+{
+ return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK;
+}
+
+static inline bool ct_header_is_response(u32 header)
+{
+ return !!(header & GUC_CT_MSG_IS_RESPONSE);
+}
+
+static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data)
+{
+ struct guc_ct_buffer_desc *desc = ctb->desc;
+ u32 head = desc->head / 4; /* in dwords */
+ u32 tail = desc->tail / 4; /* in dwords */
+ u32 size = desc->size / 4; /* in dwords */
+ u32 *cmds = ctb->cmds;
+ s32 available; /* in dwords */
+ unsigned int len;
+ unsigned int i;
+
+ GEM_BUG_ON(desc->size % 4);
+ GEM_BUG_ON(desc->head % 4);
+ GEM_BUG_ON(desc->tail % 4);
+ GEM_BUG_ON(tail >= size);
+ GEM_BUG_ON(head >= size);
+
+ /* tail == head condition indicates empty */
+ available = tail - head;
+ if (unlikely(available == 0))
+ return -ENODATA;
+
+ /* beware of buffer wrap case */
+ if (unlikely(available < 0))
+ available += size;
+ CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail);
+ GEM_BUG_ON(available < 0);
+
+ data[0] = cmds[head];
+ head = (head + 1) % size;
+
+ /* message len with header */
+ len = ct_header_get_len(data[0]) + 1;
+ if (unlikely(len > (u32)available)) {
+ DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n",
+ 4, data,
+ 4 * (head + available - 1 > size ?
+ size - head : available - 1), &cmds[head],
+ 4 * (head + available - 1 > size ?
+ available - 1 - size + head : 0), &cmds[0]);
+ return -EPROTO;
+ }
+
+ for (i = 1; i < len; i++) {
+ data[i] = cmds[head];
+ head = (head + 1) % size;
+ }
+ CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data);
+
+ desc->head = head * 4;
+ return 0;
+}
+
+/**
+ * DOC: CTB GuC to Host response
+ *
+ * Format of the CTB GuC to Host response message is as follows::
+ *
+ * +------------+---------+---------+---------+---------+---------+
+ * | msg[0] | [1] | [2] | [3] | ... | [n-1] |
+ * +------------+---------+---------+---------+---------+---------+
+ * | MESSAGE | MESSAGE PAYLOAD |
+ * + HEADER +---------+---------+---------+---------+---------+
+ * | | 0 | 1 | 2 | ... | n |
+ * +============+=========+=========+=========+=========+=========+
+ * | len >= 2 | FENCE | STATUS | response specific data |
+ * +------+-----+---------+---------+---------+---------+---------+
+ *
+ * ^-----------------------len-----------------------^
+ */
+
+static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg)
+{
+ u32 header = msg[0];
+ u32 len = ct_header_get_len(header);
+ u32 msglen = len + 1; /* total message length including header */
+ u32 fence;
+ u32 status;
+ u32 datalen;
+ struct ct_request *req;
+ bool found = false;
+
+ GEM_BUG_ON(!ct_header_is_response(header));
+ GEM_BUG_ON(!in_irq());
+
+ /* Response payload shall at least include fence and status */
+ if (unlikely(len < 2)) {
+ DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+ return -EPROTO;
+ }
+
+ fence = msg[1];
+ status = msg[2];
+ datalen = len - 2;
+
+ /* Format of the status follows RESPONSE message */
+ if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) {
+ DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg);
+ return -EPROTO;
+ }
+
+ CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status);
+
+ spin_lock(&ct->requests.lock);
+ list_for_each_entry(req, &ct->requests.pending, link) {
+ if (unlikely(fence != req->fence)) {
+ CT_DEBUG_DRIVER("CT: request %u awaits response\n",
+ req->fence);
+ continue;
+ }
+ if (unlikely(datalen > req->response_len)) {
+ DRM_ERROR("CT: response %u too long %*ph\n",
+ req->fence, 4 * msglen, msg);
+ datalen = 0;
+ }
+ if (datalen)
+ memcpy(req->response_buf, msg + 3, 4 * datalen);
+ req->response_len = datalen;
+ WRITE_ONCE(req->status, status);
+ found = true;
+ break;
+ }
+ spin_unlock(&ct->requests.lock);
+
+ if (!found)
+ DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg);
+ return 0;
+}
+
+static void ct_process_request(struct intel_guc_ct *ct,
+ u32 action, u32 len, const u32 *payload)
+{
+ struct intel_guc *guc = ct_to_guc(ct);
+ int ret;
+
+ CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload);
+
+ switch (action) {
+ case INTEL_GUC_ACTION_DEFAULT:
+ ret = intel_guc_to_host_process_recv_msg(guc, payload, len);
+ if (unlikely(ret))
+ goto fail_unexpected;
+ break;
+
+ default:
+fail_unexpected:
+ DRM_ERROR("CT: unexpected request %x %*ph\n",
+ action, 4 * len, payload);
+ break;
+ }
+}
+
+static bool ct_process_incoming_requests(struct intel_guc_ct *ct)
+{
+ unsigned long flags;
+ struct ct_incoming_request *request;
+ u32 header;
+ u32 *payload;
+ bool done;
+
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ request = list_first_entry_or_null(&ct->requests.incoming,
+ struct ct_incoming_request, link);
+ if (request)
+ list_del(&request->link);
+ done = !!list_empty(&ct->requests.incoming);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
+
+ if (!request)
+ return true;
+
+ header = request->msg[0];
+ payload = &request->msg[1];
+ ct_process_request(ct,
+ ct_header_get_action(header),
+ ct_header_get_len(header),
+ payload);
+
+ kfree(request);
+ return done;
+}
+
+static void ct_incoming_request_worker_func(struct work_struct *w)
+{
+ struct intel_guc_ct *ct =
+ container_of(w, struct intel_guc_ct, requests.worker);
+ bool done;
+
+ done = ct_process_incoming_requests(ct);
+ if (!done)
+ queue_work(system_unbound_wq, &ct->requests.worker);
+}
+
+/**
+ * DOC: CTB GuC to Host request
+ *
+ * Format of the CTB GuC to Host request message is as follows::
+ *
+ * +------------+---------+---------+---------+---------+---------+
+ * | msg[0] | [1] | [2] | [3] | ... | [n-1] |
+ * +------------+---------+---------+---------+---------+---------+
+ * | MESSAGE | MESSAGE PAYLOAD |
+ * + HEADER +---------+---------+---------+---------+---------+
+ * | | 0 | 1 | 2 | ... | n |
+ * +============+=========+=========+=========+=========+=========+
+ * | len | request specific data |
+ * +------+-----+---------+---------+---------+---------+---------+
+ *
+ * ^-----------------------len-----------------------^
+ */
+
+static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg)
+{
+ u32 header = msg[0];
+ u32 len = ct_header_get_len(header);
+ u32 msglen = len + 1; /* total message length including header */
+ struct ct_incoming_request *request;
+ unsigned long flags;
+
+ GEM_BUG_ON(ct_header_is_response(header));
+
+ request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC);
+ if (unlikely(!request)) {
+ DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg);
+ return 0; /* XXX: -ENOMEM ? */
+ }
+ memcpy(request->msg, msg, 4 * msglen);
+
+ spin_lock_irqsave(&ct->requests.lock, flags);
+ list_add_tail(&request->link, &ct->requests.incoming);
+ spin_unlock_irqrestore(&ct->requests.lock, flags);
+
+ queue_work(system_unbound_wq, &ct->requests.worker);
+ return 0;
+}
+
+/*
+ * When we're communicating with the GuC over CT, GuC uses events
+ * to notify us about new messages being posted on the RECV buffer.
+ */
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct)
+{
+ struct intel_guc_ct_buffer *ctb = &ct->ctbs[CTB_RECV];
+ u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */
+ int err = 0;
+
+ if (unlikely(!ct->enabled)) {
+ WARN(1, "Unexpected GuC event received while CT disabled!\n");
+ return;
+ }
+
+ do {
+ err = ctb_read(ctb, msg);
+ if (err)
+ break;
+
+ if (ct_header_is_response(msg[0]))
+ err = ct_handle_response(ct, msg);
+ else
+ err = ct_handle_request(ct, msg);
+ } while (!err);
+
+ if (GEM_WARN_ON(err == -EPROTO)) {
+ DRM_ERROR("CT: corrupted message detected!\n");
+ ctb->desc->is_in_error = 1;
+ }
+}
+
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
new file mode 100644
index 000000000000..3e7fe237cfa5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2016-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_CT_H_
+#define _INTEL_GUC_CT_H_
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "intel_guc_fwif.h"
+
+struct i915_vma;
+struct intel_guc;
+
+/**
+ * DOC: Command Transport (CT).
+ *
+ * Buffer based command transport is a replacement for MMIO based mechanism.
+ * It can be used to perform both host-2-guc and guc-to-host communication.
+ */
+
+/** Represents single command transport buffer.
+ *
+ * A single command transport buffer consists of two parts, the header
+ * record (command transport buffer descriptor) and the actual buffer which
+ * holds the commands.
+ *
+ * @desc: pointer to the buffer descriptor
+ * @cmds: pointer to the commands buffer
+ */
+struct intel_guc_ct_buffer {
+ struct guc_ct_buffer_desc *desc;
+ u32 *cmds;
+};
+
+
+/** Top-level structure for Command Transport related data
+ *
+ * Includes a pair of CT buffers for bi-directional communication and tracking
+ * for the H2G and G2H requests sent and received through the buffers.
+ */
+struct intel_guc_ct {
+ struct i915_vma *vma;
+ bool enabled;
+
+ /* buffers for sending(0) and receiving(1) commands */
+ struct intel_guc_ct_buffer ctbs[2];
+
+ struct {
+ u32 next_fence; /* fence to be used with next request to send */
+
+ spinlock_t lock; /* protects pending requests list */
+ struct list_head pending; /* requests waiting for response */
+
+ struct list_head incoming; /* incoming requests */
+ struct work_struct worker; /* handler for incoming requests */
+ } requests;
+};
+
+void intel_guc_ct_init_early(struct intel_guc_ct *ct);
+int intel_guc_ct_init(struct intel_guc_ct *ct);
+void intel_guc_ct_fini(struct intel_guc_ct *ct);
+int intel_guc_ct_enable(struct intel_guc_ct *ct);
+void intel_guc_ct_disable(struct intel_guc_ct *ct);
+
+static inline bool intel_guc_ct_enabled(struct intel_guc_ct *ct)
+{
+ return ct->enabled;
+}
+
+int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
+ u32 *response_buf, u32 response_buf_size);
+void intel_guc_ct_event_handler(struct intel_guc_ct *ct);
+
+#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
new file mode 100644
index 000000000000..3a1c47d600ea
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ *
+ * Authors:
+ * Vinit Azad <vinit.azad@intel.com>
+ * Ben Widawsky <ben@bwidawsk.net>
+ * Dave Gordon <david.s.gordon@intel.com>
+ * Alex Dai <yu.dai@intel.com>
+ */
+
+#include "gt/intel_gt.h"
+#include "intel_guc_fw.h"
+#include "i915_drv.h"
+
+/**
+ * intel_guc_fw_init_early() - initializes GuC firmware struct
+ * @guc: intel_guc struct
+ *
+ * On platforms with GuC selects firmware for uploading
+ */
+void intel_guc_fw_init_early(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+ intel_uc_fw_init_early(&guc->fw, INTEL_UC_FW_TYPE_GUC, HAS_GT_UC(i915),
+ INTEL_INFO(i915)->platform, INTEL_REVID(i915));
+}
+
+static void guc_prepare_xfer(struct intel_uncore *uncore)
+{
+ u32 shim_flags = GUC_DISABLE_SRAM_INIT_TO_ZEROES |
+ GUC_ENABLE_READ_CACHE_LOGIC |
+ GUC_ENABLE_MIA_CACHING |
+ GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
+ GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
+ GUC_ENABLE_MIA_CLOCK_GATING;
+
+ /* Must program this register before loading the ucode with DMA */
+ intel_uncore_write(uncore, GUC_SHIM_CONTROL, shim_flags);
+
+ if (IS_GEN9_LP(uncore->i915))
+ intel_uncore_write(uncore, GEN9LP_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+ else
+ intel_uncore_write(uncore, GEN9_GT_PM_CONFIG, GT_DOORBELL_ENABLE);
+
+ if (IS_GEN(uncore->i915, 9)) {
+ /* DOP Clock Gating Enable for GuC clocks */
+ intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
+ 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
+
+ /* allows for 5us (in 10ns units) before GT can go to RC6 */
+ intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
+ }
+}
+
+/* Copy RSA signature from the fw image to HW for verification */
+static void guc_xfer_rsa(struct intel_uc_fw *guc_fw,
+ struct intel_uncore *uncore)
+{
+ u32 rsa[UOS_RSA_SCRATCH_COUNT];
+ size_t copied;
+ int i;
+
+ copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
+ GEM_BUG_ON(copied < sizeof(rsa));
+
+ for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++)
+ intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
+}
+
+/*
+ * Read the GuC status register (GUC_STATUS) and store it in the
+ * specified location; then return a boolean indicating whether
+ * the value matches either of two values representing completion
+ * of the GuC boot process.
+ *
+ * This is used for polling the GuC status in a wait_for()
+ * loop below.
+ */
+static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
+{
+ u32 val = intel_uncore_read(uncore, GUC_STATUS);
+ u32 uk_val = val & GS_UKERNEL_MASK;
+
+ *status = val;
+ return (uk_val == GS_UKERNEL_READY) ||
+ ((val & GS_MIA_CORE_STATE) && (uk_val == GS_UKERNEL_LAPIC_DONE));
+}
+
+static int guc_wait_ucode(struct intel_uncore *uncore)
+{
+ u32 status;
+ int ret;
+
+ /*
+ * Wait for the GuC to start up.
+ * NB: Docs recommend not using the interrupt for completion.
+ * Measurements indicate this should take no more than 20ms, so a
+ * timeout here indicates that the GuC has failed and is unusable.
+ * (Higher levels of the driver may decide to reset the GuC and
+ * attempt the ucode load again if this happens.)
+ */
+ ret = wait_for(guc_ready(uncore, &status), 100);
+ DRM_DEBUG_DRIVER("GuC status %#x\n", status);
+
+ if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
+ DRM_ERROR("GuC firmware signature verification failed\n");
+ ret = -ENOEXEC;
+ }
+
+ if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
+ DRM_ERROR("GuC firmware exception. EIP: %#x\n",
+ intel_uncore_read(uncore, SOFT_SCRATCH(13)));
+ ret = -ENXIO;
+ }
+
+ return ret;
+}
+
+/**
+ * intel_guc_fw_upload() - load GuC uCode to device
+ * @guc: intel_guc structure
+ *
+ * Called from intel_uc_init_hw() during driver load, resume from sleep and
+ * after a GPU reset.
+ *
+ * The firmware image should have already been fetched into memory, so only
+ * check that fetch succeeded, and then transfer the image to the h/w.
+ *
+ * Return: non-zero code on error
+ */
+int intel_guc_fw_upload(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_uncore *uncore = gt->uncore;
+ int ret;
+
+ guc_prepare_xfer(uncore);
+
+ /*
+ * Note that GuC needs the CSS header plus uKernel code to be copied
+ * by the DMA engine in one operation, whereas the RSA signature is
+ * loaded via MMIO.
+ */
+ guc_xfer_rsa(&guc->fw, uncore);
+
+ /*
+ * Current uCode expects the code to be loaded at 8k; locations below
+ * this are used for the stack.
+ */
+ ret = intel_uc_fw_upload(&guc->fw, 0x2000, UOS_MOVE);
+ if (ret)
+ goto out;
+
+ ret = guc_wait_ucode(uncore);
+ if (ret)
+ goto out;
+
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_RUNNING);
+ return 0;
+
+out:
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_FAIL);
+ return ret;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
new file mode 100644
index 000000000000..b5ab639d7259
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2017-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_FW_H_
+#define _INTEL_GUC_FW_H_
+
+struct intel_guc;
+
+void intel_guc_fw_init_early(struct intel_guc *guc);
+int intel_guc_fw_upload(struct intel_guc *guc);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
new file mode 100644
index 000000000000..a6b733c146c9
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -0,0 +1,603 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_FWIF_H
+#define _INTEL_GUC_FWIF_H
+
+#include <linux/bits.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define GUC_CLIENT_PRIORITY_KMD_HIGH 0
+#define GUC_CLIENT_PRIORITY_HIGH 1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2
+#define GUC_CLIENT_PRIORITY_NORMAL 3
+#define GUC_CLIENT_PRIORITY_NUM 4
+
+#define GUC_MAX_STAGE_DESCRIPTORS 1024
+#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS
+
+#define GUC_RENDER_ENGINE 0
+#define GUC_VIDEO_ENGINE 1
+#define GUC_BLITTER_ENGINE 2
+#define GUC_VIDEOENHANCE_ENGINE 3
+#define GUC_VIDEO_ENGINE2 4
+#define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1)
+
+#define GUC_MAX_ENGINE_CLASSES 5
+#define GUC_MAX_INSTANCES_PER_CLASS 16
+
+#define GUC_DOORBELL_INVALID 256
+
+#define GUC_WQ_SIZE (PAGE_SIZE * 2)
+
+/* Work queue item header definitions */
+#define WQ_STATUS_ACTIVE 1
+#define WQ_STATUS_SUSPENDED 2
+#define WQ_STATUS_CMD_ERROR 3
+#define WQ_STATUS_ENGINE_ID_NOT_USED 4
+#define WQ_STATUS_SUSPENDED_FROM_RESET 5
+#define WQ_TYPE_SHIFT 0
+#define WQ_TYPE_BATCH_BUF (0x1 << WQ_TYPE_SHIFT)
+#define WQ_TYPE_PSEUDO (0x2 << WQ_TYPE_SHIFT)
+#define WQ_TYPE_INORDER (0x3 << WQ_TYPE_SHIFT)
+#define WQ_TYPE_NOOP (0x4 << WQ_TYPE_SHIFT)
+#define WQ_TARGET_SHIFT 10
+#define WQ_LEN_SHIFT 16
+#define WQ_NO_WCFLUSH_WAIT (1 << 27)
+#define WQ_PRESENT_WORKLOAD (1 << 28)
+
+#define WQ_RING_TAIL_SHIFT 20
+#define WQ_RING_TAIL_MAX 0x7FF /* 2^11 QWords */
+#define WQ_RING_TAIL_MASK (WQ_RING_TAIL_MAX << WQ_RING_TAIL_SHIFT)
+
+#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
+#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
+#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2)
+#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3)
+#define GUC_STAGE_DESC_ATTR_RESET BIT(4)
+#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5)
+#define GUC_STAGE_DESC_ATTR_PCH BIT(6)
+#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7)
+
+/* New GuC control data */
+#define GUC_CTL_CTXINFO 0
+#define GUC_CTL_CTXNUM_IN16_SHIFT 0
+#define GUC_CTL_BASE_ADDR_SHIFT 12
+
+#define GUC_CTL_LOG_PARAMS 1
+#define GUC_LOG_VALID (1 << 0)
+#define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1)
+#define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3)
+#define GUC_LOG_CRASH_SHIFT 4
+#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT)
+#define GUC_LOG_DPC_SHIFT 6
+#define GUC_LOG_DPC_MASK (0x7 << GUC_LOG_DPC_SHIFT)
+#define GUC_LOG_ISR_SHIFT 9
+#define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT)
+#define GUC_LOG_BUF_ADDR_SHIFT 12
+
+#define GUC_CTL_WA 2
+#define GUC_CTL_FEATURE 3
+#define GUC_CTL_DISABLE_SCHEDULER (1 << 14)
+
+#define GUC_CTL_DEBUG 4
+#define GUC_LOG_VERBOSITY_SHIFT 0
+#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT)
+#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT)
+/* Verbosity range-check limits, without the shift */
+#define GUC_LOG_VERBOSITY_MIN 0
+#define GUC_LOG_VERBOSITY_MAX 3
+#define GUC_LOG_VERBOSITY_MASK 0x0000000f
+#define GUC_LOG_DESTINATION_MASK (3 << 4)
+#define GUC_LOG_DISABLED (1 << 6)
+#define GUC_PROFILE_ENABLED (1 << 7)
+
+#define GUC_CTL_ADS 5
+#define GUC_ADS_ADDR_SHIFT 1
+#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
+
+#define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */
+
+/* Work item for submitting workloads into work queue of GuC. */
+struct guc_wq_item {
+ u32 header;
+ u32 context_desc;
+ u32 submit_element_info;
+ u32 fence_id;
+} __packed;
+
+struct guc_process_desc {
+ u32 stage_id;
+ u64 db_base_addr;
+ u32 head;
+ u32 tail;
+ u32 error_offset;
+ u64 wq_base_addr;
+ u32 wq_size_bytes;
+ u32 wq_status;
+ u32 engine_presence;
+ u32 priority;
+ u32 reserved[30];
+} __packed;
+
+/* engine id and context id is packed into guc_execlist_context.context_id*/
+#define GUC_ELC_CTXID_OFFSET 0
+#define GUC_ELC_ENGINE_OFFSET 29
+
+/* The execlist context including software and HW information */
+struct guc_execlist_context {
+ u32 context_desc;
+ u32 context_id;
+ u32 ring_status;
+ u32 ring_lrca;
+ u32 ring_begin;
+ u32 ring_end;
+ u32 ring_next_free_location;
+ u32 ring_current_tail_pointer_value;
+ u8 engine_state_submit_value;
+ u8 engine_state_wait_value;
+ u16 pagefault_count;
+ u16 engine_submit_queue_count;
+} __packed;
+
+/*
+ * This structure describes a stage set arranged for a particular communication
+ * between uKernel (GuC) and Driver (KMD). Technically, this is known as a
+ * "GuC Context descriptor" in the specs, but we use the term "stage descriptor"
+ * to avoid confusion with all the other things already named "context" in the
+ * driver. A static pool of these descriptors are stored inside a GEM object
+ * (stage_desc_pool) which is held for the entire lifetime of our interaction
+ * with the GuC, being allocated before the GuC is loaded with its firmware.
+ */
+struct guc_stage_desc {
+ u32 sched_common_area;
+ u32 stage_id;
+ u32 pas_id;
+ u8 engines_used;
+ u64 db_trigger_cpu;
+ u32 db_trigger_uk;
+ u64 db_trigger_phy;
+ u16 db_id;
+
+ struct guc_execlist_context lrc[GUC_MAX_ENGINES_NUM];
+
+ u8 attribute;
+
+ u32 priority;
+
+ u32 wq_sampled_tail_offset;
+ u32 wq_total_submit_enqueues;
+
+ u32 process_desc;
+ u32 wq_addr;
+ u32 wq_size;
+
+ u32 engine_presence;
+
+ u8 engine_suspended;
+
+ u8 reserved0[3];
+ u64 reserved1[1];
+
+ u64 desc_private;
+} __packed;
+
+/**
+ * DOC: CTB based communication
+ *
+ * The CTB (command transport buffer) communication between Host and GuC
+ * is based on u32 data stream written to the shared buffer. One buffer can
+ * be used to transmit data only in one direction (one-directional channel).
+ *
+ * Current status of the each buffer is stored in the buffer descriptor.
+ * Buffer descriptor holds tail and head fields that represents active data
+ * stream. The tail field is updated by the data producer (sender), and head
+ * field is updated by the data consumer (receiver)::
+ *
+ * +------------+
+ * | DESCRIPTOR | +=================+============+========+
+ * +============+ | | MESSAGE(s) | |
+ * | address |--------->+=================+============+========+
+ * +------------+
+ * | head | ^-----head--------^
+ * +------------+
+ * | tail | ^---------tail-----------------^
+ * +------------+
+ * | size | ^---------------size--------------------^
+ * +------------+
+ *
+ * Each message in data stream starts with the single u32 treated as a header,
+ * followed by optional set of u32 data that makes message specific payload::
+ *
+ * +------------+---------+---------+---------+
+ * | MESSAGE |
+ * +------------+---------+---------+---------+
+ * | msg[0] | [1] | ... | [n-1] |
+ * +------------+---------+---------+---------+
+ * | MESSAGE | MESSAGE PAYLOAD |
+ * + HEADER +---------+---------+---------+
+ * | | 0 | ... | n |
+ * +======+=====+=========+=========+=========+
+ * | 31:16| code| | | |
+ * +------+-----+ | | |
+ * | 15:5|flags| | | |
+ * +------+-----+ | | |
+ * | 4:0| len| | | |
+ * +------+-----+---------+---------+---------+
+ *
+ * ^-------------len-------------^
+ *
+ * The message header consists of:
+ *
+ * - **len**, indicates length of the message payload (in u32)
+ * - **code**, indicates message code
+ * - **flags**, holds various bits to control message handling
+ */
+
+/*
+ * Describes single command transport buffer.
+ * Used by both guc-master and clients.
+ */
+struct guc_ct_buffer_desc {
+ u32 addr; /* gfx address */
+ u64 host_private; /* host private data */
+ u32 size; /* size in bytes */
+ u32 head; /* offset updated by GuC*/
+ u32 tail; /* offset updated by owner */
+ u32 is_in_error; /* error indicator */
+ u32 fence; /* fence updated by GuC */
+ u32 status; /* status updated by GuC */
+ u32 owner; /* id of the channel owner */
+ u32 owner_sub_id; /* owner-defined field for extra tracking */
+ u32 reserved[5];
+} __packed;
+
+/* Type of command transport buffer */
+#define INTEL_GUC_CT_BUFFER_TYPE_SEND 0x0u
+#define INTEL_GUC_CT_BUFFER_TYPE_RECV 0x1u
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0] message len (in dwords)
+ * bit[7..5] reserved
+ * bit[8] response (G2H only)
+ * bit[8] write fence to desc (H2G only)
+ * bit[9] write status to H2G buff (H2G only)
+ * bit[10] send status back via G2H (H2G only)
+ * bit[15..11] reserved
+ * bit[31..16] action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT 0
+#define GUC_CT_MSG_LEN_MASK 0x1F
+#define GUC_CT_MSG_IS_RESPONSE (1 << 8)
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC (1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF (1 << 9)
+#define GUC_CT_MSG_SEND_STATUS (1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT 16
+#define GUC_CT_MSG_ACTION_MASK 0xFFFF
+
+#define GUC_FORCEWAKE_RENDER (1 << 0)
+#define GUC_FORCEWAKE_MEDIA (1 << 1)
+
+#define GUC_POWER_UNSPECIFIED 0
+#define GUC_POWER_D0 1
+#define GUC_POWER_D1 2
+#define GUC_POWER_D2 3
+#define GUC_POWER_D3 4
+
+/* Scheduling policy settings */
+
+/* Reset engine upon preempt failure */
+#define POLICY_RESET_ENGINE (1<<0)
+/* Preempt to idle on quantum expiry */
+#define POLICY_PREEMPT_TO_IDLE (1<<1)
+
+#define POLICY_MAX_NUM_WI 15
+#define POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+#define POLICY_DEFAULT_EXECUTION_QUANTUM_US 1000000
+#define POLICY_DEFAULT_PREEMPTION_TIME_US 500000
+#define POLICY_DEFAULT_FAULT_TIME_US 250000
+
+struct guc_policy {
+ /* Time for one workload to execute. (in micro seconds) */
+ u32 execution_quantum;
+ /* Time to wait for a preemption request to completed before issuing a
+ * reset. (in micro seconds). */
+ u32 preemption_time;
+ /* How much time to allow to run after the first fault is observed.
+ * Then preempt afterwards. (in micro seconds) */
+ u32 fault_time;
+ u32 policy_flags;
+ u32 reserved[8];
+} __packed;
+
+struct guc_policies {
+ struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINE_CLASSES];
+ u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
+ /* In micro seconds. How much time to allow before DPC processing is
+ * called back via interrupt (to prevent DPC queue drain starving).
+ * Typically 1000s of micro seconds (example only, not granularity). */
+ u32 dpc_promote_time;
+
+ /* Must be set to take these new values. */
+ u32 is_valid;
+
+ /* Max number of WIs to process per call. A large value may keep CS
+ * idle. */
+ u32 max_num_work_items;
+
+ u32 reserved[4];
+} __packed;
+
+/* GuC MMIO reg state struct */
+
+
+#define GUC_REGSET_MAX_REGISTERS 64
+#define GUC_S3_SAVE_SPACE_PAGES 10
+
+struct guc_mmio_reg {
+ u32 offset;
+ u32 value;
+ u32 flags;
+#define GUC_REGSET_MASKED (1 << 0)
+} __packed;
+
+struct guc_mmio_regset {
+ struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS];
+ u32 values_valid;
+ u32 number_of_registers;
+} __packed;
+
+/* GuC register sets */
+struct guc_mmio_reg_state {
+ struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
+ u32 reserved[98];
+} __packed;
+
+/* HW info */
+struct guc_gt_system_info {
+ u32 slice_enabled;
+ u32 rcs_enabled;
+ u32 reserved0;
+ u32 bcs_enabled;
+ u32 vdbox_enable_mask;
+ u32 vdbox_sfc_support_mask;
+ u32 vebox_enable_mask;
+ u32 reserved[9];
+} __packed;
+
+/* Clients info */
+struct guc_ct_pool_entry {
+ struct guc_ct_buffer_desc desc;
+ u32 reserved[7];
+} __packed;
+
+#define GUC_CT_POOL_SIZE 2
+
+struct guc_clients_info {
+ u32 clients_num;
+ u32 reserved0[13];
+ u32 ct_pool_addr;
+ u32 ct_pool_count;
+ u32 reserved[4];
+} __packed;
+
+/* GuC Additional Data Struct */
+struct guc_ads {
+ u32 reg_state_addr;
+ u32 reg_state_buffer;
+ u32 scheduler_policies;
+ u32 gt_system_info;
+ u32 clients_info;
+ u32 control_data;
+ u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES];
+ u32 eng_state_size[GUC_MAX_ENGINE_CLASSES];
+ u32 reserved[16];
+} __packed;
+
+/* GuC logging structures */
+
+enum guc_log_buffer_type {
+ GUC_ISR_LOG_BUFFER,
+ GUC_DPC_LOG_BUFFER,
+ GUC_CRASH_DUMP_LOG_BUFFER,
+ GUC_MAX_LOG_BUFFER
+};
+
+/**
+ * struct guc_log_buffer_state - GuC log buffer state
+ *
+ * Below state structure is used for coordination of retrieval of GuC firmware
+ * logs. Separate state is maintained for each log buffer type.
+ * read_ptr points to the location where i915 read last in log buffer and
+ * is read only for GuC firmware. write_ptr is incremented by GuC with number
+ * of bytes written for each log entry and is read only for i915.
+ * When any type of log buffer becomes half full, GuC sends a flush interrupt.
+ * GuC firmware expects that while it is writing to 2nd half of the buffer,
+ * first half would get consumed by Host and then get a flush completed
+ * acknowledgment from Host, so that it does not end up doing any overwrite
+ * causing loss of logs. So when buffer gets half filled & i915 has requested
+ * for interrupt, GuC will set flush_to_file field, set the sampled_write_ptr
+ * to the value of write_ptr and raise the interrupt.
+ * On receiving the interrupt i915 should read the buffer, clear flush_to_file
+ * field and also update read_ptr with the value of sample_write_ptr, before
+ * sending an acknowledgment to GuC. marker & version fields are for internal
+ * usage of GuC and opaque to i915. buffer_full_cnt field is incremented every
+ * time GuC detects the log buffer overflow.
+ */
+struct guc_log_buffer_state {
+ u32 marker[2];
+ u32 read_ptr;
+ u32 write_ptr;
+ u32 size;
+ u32 sampled_write_ptr;
+ union {
+ struct {
+ u32 flush_to_file:1;
+ u32 buffer_full_cnt:4;
+ u32 reserved:27;
+ };
+ u32 flags;
+ };
+ u32 version;
+} __packed;
+
+struct guc_ctx_report {
+ u32 report_return_status;
+ u32 reserved1[64];
+ u32 affected_count;
+ u32 reserved2[2];
+} __packed;
+
+/* GuC Shared Context Data Struct */
+struct guc_shared_ctx_data {
+ u32 addr_of_last_preempted_data_low;
+ u32 addr_of_last_preempted_data_high;
+ u32 addr_of_last_preempted_data_high_tmp;
+ u32 padding;
+ u32 is_mapped_to_proxy;
+ u32 proxy_ctx_id;
+ u32 engine_reset_ctx_id;
+ u32 media_reset_count;
+ u32 reserved1[8];
+ u32 uk_last_ctx_switch_reason;
+ u32 was_reset;
+ u32 lrca_gpu_addr;
+ u64 execlist_ctx;
+ u32 reserved2[66];
+ struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM];
+} __packed;
+
+/**
+ * DOC: MMIO based communication
+ *
+ * The MMIO based communication between Host and GuC uses software scratch
+ * registers, where first register holds data treated as message header,
+ * and other registers are used to hold message payload.
+ *
+ * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8,
+ * but no H2G command takes more than 8 parameters and the GuC FW
+ * itself uses an 8-element array to store the H2G message.
+ *
+ * +-----------+---------+---------+---------+
+ * | MMIO[0] | MMIO[1] | ... | MMIO[n] |
+ * +-----------+---------+---------+---------+
+ * | header | optional payload |
+ * +======+====+=========+=========+=========+
+ * | 31:28|type| | | |
+ * +------+----+ | | |
+ * | 27:16|data| | | |
+ * +------+----+ | | |
+ * | 15:0|code| | | |
+ * +------+----+---------+---------+---------+
+ *
+ * The message header consists of:
+ *
+ * - **type**, indicates message type
+ * - **code**, indicates message code, is specific for **type**
+ * - **data**, indicates message data, optional, depends on **code**
+ *
+ * The following message **types** are supported:
+ *
+ * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code
+ * must be priovided in **code** field. Optional action specific parameters
+ * can be provided in remaining payload registers or **data** field.
+ *
+ * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request,
+ * action response status will be provided in **code** field. Optional
+ * response data can be returned in remaining payload registers or **data**
+ * field.
+ */
+
+#define GUC_MAX_MMIO_MSG_LEN 8
+
+#define INTEL_GUC_MSG_TYPE_SHIFT 28
+#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT)
+#define INTEL_GUC_MSG_DATA_SHIFT 16
+#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT)
+#define INTEL_GUC_MSG_CODE_SHIFT 0
+#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT)
+
+#define __INTEL_GUC_MSG_GET(T, m) \
+ (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT)
+#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m)
+#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m)
+#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m)
+
+enum intel_guc_msg_type {
+ INTEL_GUC_MSG_TYPE_REQUEST = 0x0,
+ INTEL_GUC_MSG_TYPE_RESPONSE = 0xF,
+};
+
+#define __INTEL_GUC_MSG_TYPE_IS(T, m) \
+ (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T)
+#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m)
+#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m)
+
+enum intel_guc_action {
+ INTEL_GUC_ACTION_DEFAULT = 0x0,
+ INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2,
+ INTEL_GUC_ACTION_REQUEST_ENGINE_RESET = 0x3,
+ INTEL_GUC_ACTION_ALLOCATE_DOORBELL = 0x10,
+ INTEL_GUC_ACTION_DEALLOCATE_DOORBELL = 0x20,
+ INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30,
+ INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x40,
+ INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302,
+ INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
+ INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
+ INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
+ INTEL_GUC_ACTION_SAMPLE_FORCEWAKE = 0x3005,
+ INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+ INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+ INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
+ INTEL_GUC_ACTION_LIMIT
+};
+
+enum intel_guc_preempt_options {
+ INTEL_GUC_PREEMPT_OPTION_DROP_WORK_Q = 0x4,
+ INTEL_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
+};
+
+enum intel_guc_report_status {
+ INTEL_GUC_REPORT_STATUS_UNKNOWN = 0x0,
+ INTEL_GUC_REPORT_STATUS_ACKED = 0x1,
+ INTEL_GUC_REPORT_STATUS_ERROR = 0x2,
+ INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4,
+};
+
+enum intel_guc_sleep_state_status {
+ INTEL_GUC_SLEEP_STATE_SUCCESS = 0x1,
+ INTEL_GUC_SLEEP_STATE_PREEMPT_TO_IDLE_FAILED = 0x2,
+ INTEL_GUC_SLEEP_STATE_ENGINE_RESET_FAILED = 0x3
+#define INTEL_GUC_SLEEP_STATE_INVALID_MASK 0x80000000
+};
+
+#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0)
+#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4
+#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT)
+#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8)
+
+enum intel_guc_response_status {
+ INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+ INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+};
+
+#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \
+ (typecheck(u32, (m)) && \
+ ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \
+ ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \
+ (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT)))
+
+/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */
+enum intel_guc_recv_message {
+ INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1),
+ INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER = BIT(3)
+};
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
new file mode 100644
index 000000000000..caed0d57e704
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "gt/intel_gt.h"
+#include "i915_drv.h"
+#include "i915_memcpy.h"
+#include "intel_guc_log.h"
+
+static void guc_log_capture_logs(struct intel_guc_log *log);
+
+/**
+ * DOC: GuC firmware log
+ *
+ * Firmware log is enabled by setting i915.guc_log_level to the positive level.
+ * Log data is printed out via reading debugfs i915_guc_log_dump. Reading from
+ * i915_guc_load_status will print out firmware loading status and scratch
+ * registers value.
+ */
+
+static int guc_action_flush_log_complete(struct intel_guc *guc)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_action_flush_log(struct intel_guc *guc)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH,
+ 0
+ };
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static int guc_action_control_log(struct intel_guc *guc, bool enable,
+ bool default_logging, u32 verbosity)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING,
+ (enable ? GUC_LOG_CONTROL_LOGGING_ENABLED : 0) |
+ (verbosity << GUC_LOG_CONTROL_VERBOSITY_SHIFT) |
+ (default_logging ? GUC_LOG_CONTROL_DEFAULT_LOGGING : 0)
+ };
+
+ GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX);
+
+ return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
+
+static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
+{
+ return container_of(log, struct intel_guc, log);
+}
+
+static void guc_log_enable_flush_events(struct intel_guc_log *log)
+{
+ intel_guc_enable_msg(log_to_guc(log),
+ INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
+ INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
+}
+
+static void guc_log_disable_flush_events(struct intel_guc_log *log)
+{
+ intel_guc_disable_msg(log_to_guc(log),
+ INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER |
+ INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED);
+}
+
+/*
+ * Sub buffer switch callback. Called whenever relay has to switch to a new
+ * sub buffer, relay stays on the same sub buffer if 0 is returned.
+ */
+static int subbuf_start_callback(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ size_t prev_padding)
+{
+ /*
+ * Use no-overwrite mode by default, where relay will stop accepting
+ * new data if there are no empty sub buffers left.
+ * There is no strict synchronization enforced by relay between Consumer
+ * and Producer. In overwrite mode, there is a possibility of getting
+ * inconsistent/garbled data, the producer could be writing on to the
+ * same sub buffer from which Consumer is reading. This can't be avoided
+ * unless Consumer is fast enough and can always run in tandem with
+ * Producer.
+ */
+ if (relay_buf_full(buf))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * file_create() callback. Creates relay file in debugfs.
+ */
+static struct dentry *create_buf_file_callback(const char *filename,
+ struct dentry *parent,
+ umode_t mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ struct dentry *buf_file;
+
+ /*
+ * This to enable the use of a single buffer for the relay channel and
+ * correspondingly have a single file exposed to User, through which
+ * it can collect the logs in order without any post-processing.
+ * Need to set 'is_global' even if parent is NULL for early logging.
+ */
+ *is_global = 1;
+
+ if (!parent)
+ return NULL;
+
+ buf_file = debugfs_create_file(filename, mode,
+ parent, buf, &relay_file_operations);
+ if (IS_ERR(buf_file))
+ return NULL;
+
+ return buf_file;
+}
+
+/*
+ * file_remove() default callback. Removes relay file in debugfs.
+ */
+static int remove_buf_file_callback(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+ return 0;
+}
+
+/* relay channel callbacks */
+static struct rchan_callbacks relay_callbacks = {
+ .subbuf_start = subbuf_start_callback,
+ .create_buf_file = create_buf_file_callback,
+ .remove_buf_file = remove_buf_file_callback,
+};
+
+static void guc_move_to_next_buf(struct intel_guc_log *log)
+{
+ /*
+ * Make sure the updates made in the sub buffer are visible when
+ * Consumer sees the following update to offset inside the sub buffer.
+ */
+ smp_wmb();
+
+ /* All data has been written, so now move the offset of sub buffer. */
+ relay_reserve(log->relay.channel, log->vma->obj->base.size);
+
+ /* Switch to the next sub buffer */
+ relay_flush(log->relay.channel);
+}
+
+static void *guc_get_write_buffer(struct intel_guc_log *log)
+{
+ /*
+ * Just get the base address of a new sub buffer and copy data into it
+ * ourselves. NULL will be returned in no-overwrite mode, if all sub
+ * buffers are full. Could have used the relay_write() to indirectly
+ * copy the data, but that would have been bit convoluted, as we need to
+ * write to only certain locations inside a sub buffer which cannot be
+ * done without using relay_reserve() along with relay_write(). So its
+ * better to use relay_reserve() alone.
+ */
+ return relay_reserve(log->relay.channel, 0);
+}
+
+static bool guc_check_log_buf_overflow(struct intel_guc_log *log,
+ enum guc_log_buffer_type type,
+ unsigned int full_cnt)
+{
+ unsigned int prev_full_cnt = log->stats[type].sampled_overflow;
+ bool overflow = false;
+
+ if (full_cnt != prev_full_cnt) {
+ overflow = true;
+
+ log->stats[type].overflow = full_cnt;
+ log->stats[type].sampled_overflow += full_cnt - prev_full_cnt;
+
+ if (full_cnt < prev_full_cnt) {
+ /* buffer_full_cnt is a 4 bit counter */
+ log->stats[type].sampled_overflow += 16;
+ }
+
+ dev_notice_ratelimited(guc_to_gt(log_to_guc(log))->i915->drm.dev,
+ "GuC log buffer overflow\n");
+ }
+
+ return overflow;
+}
+
+static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type)
+{
+ switch (type) {
+ case GUC_ISR_LOG_BUFFER:
+ return ISR_BUFFER_SIZE;
+ case GUC_DPC_LOG_BUFFER:
+ return DPC_BUFFER_SIZE;
+ case GUC_CRASH_DUMP_LOG_BUFFER:
+ return CRASH_BUFFER_SIZE;
+ default:
+ MISSING_CASE(type);
+ }
+
+ return 0;
+}
+
+static void guc_read_update_log_buffer(struct intel_guc_log *log)
+{
+ unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt;
+ struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state;
+ struct guc_log_buffer_state log_buf_state_local;
+ enum guc_log_buffer_type type;
+ void *src_data, *dst_data;
+ bool new_overflow;
+
+ mutex_lock(&log->relay.lock);
+
+ if (WARN_ON(!intel_guc_log_relay_created(log)))
+ goto out_unlock;
+
+ /* Get the pointer to shared GuC log buffer */
+ log_buf_state = src_data = log->relay.buf_addr;
+
+ /* Get the pointer to local buffer to store the logs */
+ log_buf_snapshot_state = dst_data = guc_get_write_buffer(log);
+
+ if (unlikely(!log_buf_snapshot_state)) {
+ /*
+ * Used rate limited to avoid deluge of messages, logs might be
+ * getting consumed by User at a slow rate.
+ */
+ DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
+ log->relay.full_count++;
+
+ goto out_unlock;
+ }
+
+ /* Actual logs are present from the 2nd page */
+ src_data += PAGE_SIZE;
+ dst_data += PAGE_SIZE;
+
+ for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) {
+ /*
+ * Make a copy of the state structure, inside GuC log buffer
+ * (which is uncached mapped), on the stack to avoid reading
+ * from it multiple times.
+ */
+ memcpy(&log_buf_state_local, log_buf_state,
+ sizeof(struct guc_log_buffer_state));
+ buffer_size = guc_get_log_buffer_size(type);
+ read_offset = log_buf_state_local.read_ptr;
+ write_offset = log_buf_state_local.sampled_write_ptr;
+ full_cnt = log_buf_state_local.buffer_full_cnt;
+
+ /* Bookkeeping stuff */
+ log->stats[type].flush += log_buf_state_local.flush_to_file;
+ new_overflow = guc_check_log_buf_overflow(log, type, full_cnt);
+
+ /* Update the state of shared log buffer */
+ log_buf_state->read_ptr = write_offset;
+ log_buf_state->flush_to_file = 0;
+ log_buf_state++;
+
+ /* First copy the state structure in snapshot buffer */
+ memcpy(log_buf_snapshot_state, &log_buf_state_local,
+ sizeof(struct guc_log_buffer_state));
+
+ /*
+ * The write pointer could have been updated by GuC firmware,
+ * after sending the flush interrupt to Host, for consistency
+ * set write pointer value to same value of sampled_write_ptr
+ * in the snapshot buffer.
+ */
+ log_buf_snapshot_state->write_ptr = write_offset;
+ log_buf_snapshot_state++;
+
+ /* Now copy the actual logs. */
+ if (unlikely(new_overflow)) {
+ /* copy the whole buffer in case of overflow */
+ read_offset = 0;
+ write_offset = buffer_size;
+ } else if (unlikely((read_offset > buffer_size) ||
+ (write_offset > buffer_size))) {
+ DRM_ERROR("invalid log buffer state\n");
+ /* copy whole buffer as offsets are unreliable */
+ read_offset = 0;
+ write_offset = buffer_size;
+ }
+
+ /* Just copy the newly written data */
+ if (read_offset > write_offset) {
+ i915_memcpy_from_wc(dst_data, src_data, write_offset);
+ bytes_to_copy = buffer_size - read_offset;
+ } else {
+ bytes_to_copy = write_offset - read_offset;
+ }
+ i915_memcpy_from_wc(dst_data + read_offset,
+ src_data + read_offset, bytes_to_copy);
+
+ src_data += buffer_size;
+ dst_data += buffer_size;
+ }
+
+ guc_move_to_next_buf(log);
+
+out_unlock:
+ mutex_unlock(&log->relay.lock);
+}
+
+static void capture_logs_work(struct work_struct *work)
+{
+ struct intel_guc_log *log =
+ container_of(work, struct intel_guc_log, relay.flush_work);
+
+ guc_log_capture_logs(log);
+}
+
+static int guc_log_map(struct intel_guc_log *log)
+{
+ void *vaddr;
+
+ lockdep_assert_held(&log->relay.lock);
+
+ if (!log->vma)
+ return -ENODEV;
+
+ /*
+ * Create a WC (Uncached for read) vmalloc mapping of log
+ * buffer pages, so that we can directly get the data
+ * (up-to-date) from memory.
+ */
+ vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC);
+ if (IS_ERR(vaddr))
+ return PTR_ERR(vaddr);
+
+ log->relay.buf_addr = vaddr;
+
+ return 0;
+}
+
+static void guc_log_unmap(struct intel_guc_log *log)
+{
+ lockdep_assert_held(&log->relay.lock);
+
+ i915_gem_object_unpin_map(log->vma->obj);
+ log->relay.buf_addr = NULL;
+}
+
+void intel_guc_log_init_early(struct intel_guc_log *log)
+{
+ mutex_init(&log->relay.lock);
+ INIT_WORK(&log->relay.flush_work, capture_logs_work);
+ log->relay.started = false;
+}
+
+static int guc_log_relay_create(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ struct rchan *guc_log_relay_chan;
+ size_t n_subbufs, subbuf_size;
+ int ret;
+
+ lockdep_assert_held(&log->relay.lock);
+ GEM_BUG_ON(!log->vma);
+
+ /* Keep the size of sub buffers same as shared log buffer */
+ subbuf_size = log->vma->size;
+
+ /*
+ * Store up to 8 snapshots, which is large enough to buffer sufficient
+ * boot time logs and provides enough leeway to User, in terms of
+ * latency, for consuming the logs from relay. Also doesn't take
+ * up too much memory.
+ */
+ n_subbufs = 8;
+
+ guc_log_relay_chan = relay_open("guc_log",
+ dev_priv->drm.primary->debugfs_root,
+ subbuf_size, n_subbufs,
+ &relay_callbacks, dev_priv);
+ if (!guc_log_relay_chan) {
+ DRM_ERROR("Couldn't create relay chan for GuC logging\n");
+
+ ret = -ENOMEM;
+ return ret;
+ }
+
+ GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
+ log->relay.channel = guc_log_relay_chan;
+
+ return 0;
+}
+
+static void guc_log_relay_destroy(struct intel_guc_log *log)
+{
+ lockdep_assert_held(&log->relay.lock);
+
+ relay_close(log->relay.channel);
+ log->relay.channel = NULL;
+}
+
+static void guc_log_capture_logs(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ intel_wakeref_t wakeref;
+
+ guc_read_update_log_buffer(log);
+
+ /*
+ * Generally device is expected to be active only at this
+ * time, so get/put should be really quick.
+ */
+ with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
+ guc_action_flush_log_complete(guc);
+}
+
+static u32 __get_default_log_level(struct intel_guc_log *log)
+{
+ /* A negative value means "use platform/config default" */
+ if (i915_modparams.guc_log_level < 0) {
+ return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
+ IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ?
+ GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_NON_VERBOSE;
+ }
+
+ if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) {
+ DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
+ "guc_log_level", i915_modparams.guc_log_level,
+ "verbosity too high");
+ return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
+ IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ?
+ GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED;
+ }
+
+ GEM_BUG_ON(i915_modparams.guc_log_level < GUC_LOG_LEVEL_DISABLED);
+ GEM_BUG_ON(i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX);
+ return i915_modparams.guc_log_level;
+}
+
+int intel_guc_log_create(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct i915_vma *vma;
+ u32 guc_log_size;
+ int ret;
+
+ GEM_BUG_ON(log->vma);
+
+ /*
+ * GuC Log buffer Layout
+ *
+ * +===============================+ 00B
+ * | Crash dump state header |
+ * +-------------------------------+ 32B
+ * | DPC state header |
+ * +-------------------------------+ 64B
+ * | ISR state header |
+ * +-------------------------------+ 96B
+ * | |
+ * +===============================+ PAGE_SIZE (4KB)
+ * | Crash Dump logs |
+ * +===============================+ + CRASH_SIZE
+ * | DPC logs |
+ * +===============================+ + DPC_SIZE
+ * | ISR logs |
+ * +===============================+ + ISR_SIZE
+ */
+ guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DPC_BUFFER_SIZE +
+ ISR_BUFFER_SIZE;
+
+ vma = intel_guc_allocate_vma(guc, guc_log_size);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto err;
+ }
+
+ log->vma = vma;
+
+ log->level = __get_default_log_level(log);
+ DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n",
+ log->level, enableddisabled(log->level),
+ yesno(GUC_LOG_LEVEL_IS_VERBOSE(log->level)),
+ GUC_LOG_LEVEL_TO_VERBOSITY(log->level));
+
+ return 0;
+
+err:
+ DRM_ERROR("Failed to allocate GuC log buffer. %d\n", ret);
+ return ret;
+}
+
+void intel_guc_log_destroy(struct intel_guc_log *log)
+{
+ i915_vma_unpin_and_release(&log->vma, 0);
+}
+
+int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *dev_priv = guc_to_gt(guc)->i915;
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0);
+ GEM_BUG_ON(!log->vma);
+
+ /*
+ * GuC is recognizing log levels starting from 0 to max, we're using 0
+ * as indication that logging should be disabled.
+ */
+ if (level < GUC_LOG_LEVEL_DISABLED || level > GUC_LOG_LEVEL_MAX)
+ return -EINVAL;
+
+ mutex_lock(&dev_priv->drm.struct_mutex);
+
+ if (log->level == level)
+ goto out_unlock;
+
+ with_intel_runtime_pm(&dev_priv->runtime_pm, wakeref)
+ ret = guc_action_control_log(guc,
+ GUC_LOG_LEVEL_IS_VERBOSE(level),
+ GUC_LOG_LEVEL_IS_ENABLED(level),
+ GUC_LOG_LEVEL_TO_VERBOSITY(level));
+ if (ret) {
+ DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
+ goto out_unlock;
+ }
+
+ log->level = level;
+
+out_unlock:
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
+ return ret;
+}
+
+bool intel_guc_log_relay_created(const struct intel_guc_log *log)
+{
+ return log->relay.buf_addr;
+}
+
+int intel_guc_log_relay_open(struct intel_guc_log *log)
+{
+ int ret;
+
+ if (!log->vma)
+ return -ENODEV;
+
+ mutex_lock(&log->relay.lock);
+
+ if (intel_guc_log_relay_created(log)) {
+ ret = -EEXIST;
+ goto out_unlock;
+ }
+
+ /*
+ * We require SSE 4.1 for fast reads from the GuC log buffer and
+ * it should be present on the chipsets supporting GuC based
+ * submisssions.
+ */
+ if (!i915_has_memcpy_from_wc()) {
+ ret = -ENXIO;
+ goto out_unlock;
+ }
+
+ ret = guc_log_relay_create(log);
+ if (ret)
+ goto out_unlock;
+
+ ret = guc_log_map(log);
+ if (ret)
+ goto out_relay;
+
+ mutex_unlock(&log->relay.lock);
+
+ return 0;
+
+out_relay:
+ guc_log_relay_destroy(log);
+out_unlock:
+ mutex_unlock(&log->relay.lock);
+
+ return ret;
+}
+
+int intel_guc_log_relay_start(struct intel_guc_log *log)
+{
+ if (log->relay.started)
+ return -EEXIST;
+
+ guc_log_enable_flush_events(log);
+
+ /*
+ * When GuC is logging without us relaying to userspace, we're ignoring
+ * the flush notification. This means that we need to unconditionally
+ * flush on relay enabling, since GuC only notifies us once.
+ */
+ queue_work(system_highpri_wq, &log->relay.flush_work);
+
+ log->relay.started = true;
+
+ return 0;
+}
+
+void intel_guc_log_relay_flush(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ intel_wakeref_t wakeref;
+
+ if (!log->relay.started)
+ return;
+
+ /*
+ * Before initiating the forceful flush, wait for any pending/ongoing
+ * flush to complete otherwise forceful flush may not actually happen.
+ */
+ flush_work(&log->relay.flush_work);
+
+ with_intel_runtime_pm(guc_to_gt(guc)->uncore->rpm, wakeref)
+ guc_action_flush_log(guc);
+
+ /* GuC would have updated log buffer by now, so capture it */
+ guc_log_capture_logs(log);
+}
+
+/*
+ * Stops the relay log. Called from intel_guc_log_relay_close(), so no
+ * possibility of race with start/flush since relay_write cannot race
+ * relay_close.
+ */
+static void guc_log_relay_stop(struct intel_guc_log *log)
+{
+ struct intel_guc *guc = log_to_guc(log);
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+ if (!log->relay.started)
+ return;
+
+ guc_log_disable_flush_events(log);
+ intel_synchronize_irq(i915);
+
+ flush_work(&log->relay.flush_work);
+
+ log->relay.started = false;
+}
+
+void intel_guc_log_relay_close(struct intel_guc_log *log)
+{
+ guc_log_relay_stop(log);
+
+ mutex_lock(&log->relay.lock);
+ GEM_BUG_ON(!intel_guc_log_relay_created(log));
+ guc_log_unmap(log);
+ guc_log_relay_destroy(log);
+ mutex_unlock(&log->relay.lock);
+}
+
+void intel_guc_log_handle_flush_event(struct intel_guc_log *log)
+{
+ queue_work(system_highpri_wq, &log->relay.flush_work);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
new file mode 100644
index 000000000000..c252c022c5fc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_LOG_H_
+#define _INTEL_GUC_LOG_H_
+
+#include <linux/mutex.h>
+#include <linux/relay.h>
+#include <linux/workqueue.h>
+
+#include "intel_guc_fwif.h"
+#include "i915_gem.h"
+
+struct intel_guc;
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+#define CRASH_BUFFER_SIZE SZ_2M
+#define DPC_BUFFER_SIZE SZ_8M
+#define ISR_BUFFER_SIZE SZ_8M
+#else
+#define CRASH_BUFFER_SIZE SZ_8K
+#define DPC_BUFFER_SIZE SZ_32K
+#define ISR_BUFFER_SIZE SZ_32K
+#endif
+
+/*
+ * While we're using plain log level in i915, GuC controls are much more...
+ * "elaborate"? We have a couple of bits for verbosity, separate bit for actual
+ * log enabling, and separate bit for default logging - which "conveniently"
+ * ignores the enable bit.
+ */
+#define GUC_LOG_LEVEL_DISABLED 0
+#define GUC_LOG_LEVEL_NON_VERBOSE 1
+#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED)
+#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE)
+#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \
+ typeof(x) _x = (x); \
+ GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \
+})
+#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2)
+#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX)
+
+struct intel_guc_log {
+ u32 level;
+ struct i915_vma *vma;
+ struct {
+ void *buf_addr;
+ bool started;
+ struct work_struct flush_work;
+ struct rchan *channel;
+ struct mutex lock;
+ u32 full_count;
+ } relay;
+ /* logging related stats */
+ struct {
+ u32 sampled_overflow;
+ u32 overflow;
+ u32 flush;
+ } stats[GUC_MAX_LOG_BUFFER];
+};
+
+void intel_guc_log_init_early(struct intel_guc_log *log);
+int intel_guc_log_create(struct intel_guc_log *log);
+void intel_guc_log_destroy(struct intel_guc_log *log);
+
+int intel_guc_log_set_level(struct intel_guc_log *log, u32 level);
+bool intel_guc_log_relay_created(const struct intel_guc_log *log);
+int intel_guc_log_relay_open(struct intel_guc_log *log);
+int intel_guc_log_relay_start(struct intel_guc_log *log);
+void intel_guc_log_relay_flush(struct intel_guc_log *log);
+void intel_guc_log_relay_close(struct intel_guc_log *log);
+
+void intel_guc_log_handle_flush_event(struct intel_guc_log *log);
+
+static inline u32 intel_guc_log_get_level(struct intel_guc_log *log)
+{
+ return log->level;
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
new file mode 100644
index 000000000000..1949346e714e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_REG_H_
+#define _INTEL_GUC_REG_H_
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "i915_reg.h"
+
+/* Definitions of GuC H/W registers, bits, etc */
+
+#define GUC_STATUS _MMIO(0xc000)
+#define GS_RESET_SHIFT 0
+#define GS_MIA_IN_RESET (0x01 << GS_RESET_SHIFT)
+#define GS_BOOTROM_SHIFT 1
+#define GS_BOOTROM_MASK (0x7F << GS_BOOTROM_SHIFT)
+#define GS_BOOTROM_RSA_FAILED (0x50 << GS_BOOTROM_SHIFT)
+#define GS_BOOTROM_JUMP_PASSED (0x76 << GS_BOOTROM_SHIFT)
+#define GS_UKERNEL_SHIFT 8
+#define GS_UKERNEL_MASK (0xFF << GS_UKERNEL_SHIFT)
+#define GS_UKERNEL_LAPIC_DONE (0x30 << GS_UKERNEL_SHIFT)
+#define GS_UKERNEL_DPC_ERROR (0x60 << GS_UKERNEL_SHIFT)
+#define GS_UKERNEL_EXCEPTION (0x70 << GS_UKERNEL_SHIFT)
+#define GS_UKERNEL_READY (0xF0 << GS_UKERNEL_SHIFT)
+#define GS_MIA_SHIFT 16
+#define GS_MIA_MASK (0x07 << GS_MIA_SHIFT)
+#define GS_MIA_CORE_STATE (0x01 << GS_MIA_SHIFT)
+#define GS_MIA_HALT_REQUESTED (0x02 << GS_MIA_SHIFT)
+#define GS_MIA_ISR_ENTRY (0x04 << GS_MIA_SHIFT)
+#define GS_AUTH_STATUS_SHIFT 30
+#define GS_AUTH_STATUS_MASK (0x03 << GS_AUTH_STATUS_SHIFT)
+#define GS_AUTH_STATUS_BAD (0x01 << GS_AUTH_STATUS_SHIFT)
+#define GS_AUTH_STATUS_GOOD (0x02 << GS_AUTH_STATUS_SHIFT)
+
+#define SOFT_SCRATCH(n) _MMIO(0xc180 + (n) * 4)
+#define SOFT_SCRATCH_COUNT 16
+
+#define GEN11_SOFT_SCRATCH(n) _MMIO(0x190240 + (n) * 4)
+#define GEN11_SOFT_SCRATCH_COUNT 4
+
+#define UOS_RSA_SCRATCH(i) _MMIO(0xc200 + (i) * 4)
+#define UOS_RSA_SCRATCH_COUNT 64
+
+#define DMA_ADDR_0_LOW _MMIO(0xc300)
+#define DMA_ADDR_0_HIGH _MMIO(0xc304)
+#define DMA_ADDR_1_LOW _MMIO(0xc308)
+#define DMA_ADDR_1_HIGH _MMIO(0xc30c)
+#define DMA_ADDRESS_SPACE_WOPCM (7 << 16)
+#define DMA_ADDRESS_SPACE_GTT (8 << 16)
+#define DMA_COPY_SIZE _MMIO(0xc310)
+#define DMA_CTRL _MMIO(0xc314)
+#define HUC_UKERNEL (1<<9)
+#define UOS_MOVE (1<<4)
+#define START_DMA (1<<0)
+#define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340)
+#define GUC_WOPCM_OFFSET_VALID (1<<0)
+#define HUC_LOADING_AGENT_VCR (0<<1)
+#define HUC_LOADING_AGENT_GUC (1<<1)
+#define GUC_WOPCM_OFFSET_SHIFT 14
+#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT)
+#define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4)
+
+#define HUC_STATUS2 _MMIO(0xD3B0)
+#define HUC_FW_VERIFIED (1<<7)
+
+#define GEN11_HUC_KERNEL_LOAD_INFO _MMIO(0xC1DC)
+#define HUC_LOAD_SUCCESSFUL (1 << 0)
+
+#define GUC_WOPCM_SIZE _MMIO(0xc050)
+#define GUC_WOPCM_SIZE_LOCKED (1<<0)
+#define GUC_WOPCM_SIZE_SHIFT 12
+#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT)
+
+#define GEN8_GT_PM_CONFIG _MMIO(0x138140)
+#define GEN9LP_GT_PM_CONFIG _MMIO(0x138140)
+#define GEN9_GT_PM_CONFIG _MMIO(0x13816c)
+#define GT_DOORBELL_ENABLE (1<<0)
+
+#define GEN8_GTCR _MMIO(0x4274)
+#define GEN8_GTCR_INVALIDATE (1<<0)
+
+#define GEN12_GUC_TLB_INV_CR _MMIO(0xcee8)
+#define GEN12_GUC_TLB_INV_CR_INVALIDATE (1 << 0)
+
+#define GUC_ARAT_C6DIS _MMIO(0xA178)
+
+#define GUC_SHIM_CONTROL _MMIO(0xc064)
+#define GUC_DISABLE_SRAM_INIT_TO_ZEROES (1<<0)
+#define GUC_ENABLE_READ_CACHE_LOGIC (1<<1)
+#define GUC_ENABLE_MIA_CACHING (1<<2)
+#define GUC_GEN10_MSGCH_ENABLE (1<<4)
+#define GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA (1<<9)
+#define GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA (1<<10)
+#define GUC_ENABLE_MIA_CLOCK_GATING (1<<15)
+#define GUC_GEN10_SHIM_WC_ENABLE (1<<21)
+
+#define GUC_SEND_INTERRUPT _MMIO(0xc4c8)
+#define GUC_SEND_TRIGGER (1<<0)
+#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0)
+
+#define GUC_NUM_DOORBELLS 256
+
+/* format of the HW-monitored doorbell cacheline */
+struct guc_doorbell_info {
+ u32 db_status;
+#define GUC_DOORBELL_DISABLED 0
+#define GUC_DOORBELL_ENABLED 1
+
+ u32 cookie;
+ u32 reserved[14];
+} __packed;
+
+#define GEN8_DRBREGL(x) _MMIO(0x1000 + (x) * 8)
+#define GEN8_DRB_VALID (1<<0)
+#define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4)
+
+#define DE_GUCRMR _MMIO(0x44054)
+
+#define GUC_BCS_RCS_IER _MMIO(0xC550)
+#define GUC_VCS2_VCS1_IER _MMIO(0xC554)
+#define GUC_WD_VECS_IER _MMIO(0xC558)
+#define GUC_PM_P24C_IER _MMIO(0xC55C)
+
+/* GuC Interrupt Vector */
+#define GUC_INTR_GUC2HOST BIT(15)
+#define GUC_INTR_EXEC_ERROR BIT(14)
+#define GUC_INTR_DISPLAY_EVENT BIT(13)
+#define GUC_INTR_SEM_SIG BIT(12)
+#define GUC_INTR_IOMMU2GUC BIT(11)
+#define GUC_INTR_DOORBELL_RANG BIT(10)
+#define GUC_INTR_DMA_DONE BIT(9)
+#define GUC_INTR_FATAL_ERROR BIT(8)
+#define GUC_INTR_NOTIF_ERROR BIT(7)
+#define GUC_INTR_SW_INT_6 BIT(6)
+#define GUC_INTR_SW_INT_5 BIT(5)
+#define GUC_INTR_SW_INT_4 BIT(4)
+#define GUC_INTR_SW_INT_3 BIT(3)
+#define GUC_INTR_SW_INT_2 BIT(2)
+#define GUC_INTR_SW_INT_1 BIT(1)
+#define GUC_INTR_SW_INT_0 BIT(0)
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
new file mode 100644
index 000000000000..9e42324fdecd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+
+#include "gem/i915_gem_context.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_lrc_reg.h"
+#include "gt/intel_ring.h"
+
+#include "intel_guc_submission.h"
+
+#include "i915_drv.h"
+#include "i915_trace.h"
+
+/**
+ * DOC: GuC-based command submission
+ *
+ * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC
+ * firmware is moving to an updated submission interface and we plan to
+ * turn submission back on when that lands. The below documentation (and related
+ * code) matches the old submission model and will be updated as part of the
+ * upgrade to the new flow.
+ *
+ * GuC stage descriptor:
+ * During initialization, the driver allocates a static pool of 1024 such
+ * descriptors, and shares them with the GuC. Currently, we only use one
+ * descriptor. This stage descriptor lets the GuC know about the workqueue and
+ * process descriptor. Theoretically, it also lets the GuC know about our HW
+ * contexts (context ID, etc...), but we actually employ a kind of submission
+ * where the GuC uses the LRCA sent via the work item instead. This is called
+ * a "proxy" submission.
+ *
+ * The Scratch registers:
+ * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
+ * a value to the action register (SOFT_SCRATCH_0) along with any data. It then
+ * triggers an interrupt on the GuC via another register write (0xC4C8).
+ * Firmware writes a success/fail code back to the action register after
+ * processes the request. The kernel driver polls waiting for this update and
+ * then proceeds.
+ *
+ * Work Items:
+ * There are several types of work items that the host may place into a
+ * workqueue, each with its own requirements and limitations. Currently only
+ * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which
+ * represents in-order queue. The kernel driver packs ring tail pointer and an
+ * ELSP context descriptor dword into Work Item.
+ * See guc_add_request()
+ *
+ */
+
+static inline struct i915_priolist *to_priolist(struct rb_node *rb)
+{
+ return rb_entry(rb, struct i915_priolist, node);
+}
+
+static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
+{
+ struct guc_stage_desc *base = guc->stage_desc_pool_vaddr;
+
+ return &base[id];
+}
+
+static int guc_workqueue_create(struct intel_guc *guc)
+{
+ return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
+ &guc->workqueue_vaddr);
+}
+
+static void guc_workqueue_destroy(struct intel_guc *guc)
+{
+ i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
+}
+
+/*
+ * Initialise the process descriptor shared with the GuC firmware.
+ */
+static int guc_proc_desc_create(struct intel_guc *guc)
+{
+ const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
+
+ return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
+ &guc->proc_desc_vaddr);
+}
+
+static void guc_proc_desc_destroy(struct intel_guc *guc)
+{
+ i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
+}
+
+static void guc_proc_desc_init(struct intel_guc *guc)
+{
+ struct guc_process_desc *desc;
+
+ desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
+
+ /*
+ * XXX: pDoorbell and WQVBaseAddress are pointers in process address
+ * space for ring3 clients (set them as in mmap_ioctl) or kernel
+ * space for kernel clients (map on demand instead? May make debug
+ * easier to have it mapped).
+ */
+ desc->wq_base_addr = 0;
+ desc->db_base_addr = 0;
+
+ desc->wq_size_bytes = GUC_WQ_SIZE;
+ desc->wq_status = WQ_STATUS_ACTIVE;
+ desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
+}
+
+static void guc_proc_desc_fini(struct intel_guc *guc)
+{
+ memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
+}
+
+static int guc_stage_desc_pool_create(struct intel_guc *guc)
+{
+ u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
+ GUC_MAX_STAGE_DESCRIPTORS);
+
+ return intel_guc_allocate_and_map_vma(guc, size, &guc->stage_desc_pool,
+ &guc->stage_desc_pool_vaddr);
+}
+
+static void guc_stage_desc_pool_destroy(struct intel_guc *guc)
+{
+ i915_vma_unpin_and_release(&guc->stage_desc_pool, I915_VMA_RELEASE_MAP);
+}
+
+/*
+ * Initialise/clear the stage descriptor shared with the GuC firmware.
+ *
+ * This descriptor tells the GuC where (in GGTT space) to find the important
+ * data structures related to work submission (process descriptor, write queue,
+ * etc).
+ */
+static void guc_stage_desc_init(struct intel_guc *guc)
+{
+ struct guc_stage_desc *desc;
+
+ /* we only use 1 stage desc, so hardcode it to 0 */
+ desc = __get_stage_desc(guc, 0);
+ memset(desc, 0, sizeof(*desc));
+
+ desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE |
+ GUC_STAGE_DESC_ATTR_KERNEL;
+
+ desc->stage_id = 0;
+ desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
+
+ desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
+ desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
+ desc->wq_size = GUC_WQ_SIZE;
+}
+
+static void guc_stage_desc_fini(struct intel_guc *guc)
+{
+ struct guc_stage_desc *desc;
+
+ desc = __get_stage_desc(guc, 0);
+ memset(desc, 0, sizeof(*desc));
+}
+
+/* Construct a Work Item and append it to the GuC's Work Queue */
+static void guc_wq_item_append(struct intel_guc *guc,
+ u32 target_engine, u32 context_desc,
+ u32 ring_tail, u32 fence_id)
+{
+ /* wqi_len is in DWords, and does not include the one-word header */
+ const size_t wqi_size = sizeof(struct guc_wq_item);
+ const u32 wqi_len = wqi_size / sizeof(u32) - 1;
+ struct guc_process_desc *desc = guc->proc_desc_vaddr;
+ struct guc_wq_item *wqi;
+ u32 wq_off;
+
+ lockdep_assert_held(&guc->wq_lock);
+
+ /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
+ * should not have the case where structure wqi is across page, neither
+ * wrapped to the beginning. This simplifies the implementation below.
+ *
+ * XXX: if not the case, we need save data to a temp wqi and copy it to
+ * workqueue buffer dw by dw.
+ */
+ BUILD_BUG_ON(wqi_size != 16);
+
+ /* We expect the WQ to be active if we're appending items to it */
+ GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE);
+
+ /* Free space is guaranteed. */
+ wq_off = READ_ONCE(desc->tail);
+ GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head),
+ GUC_WQ_SIZE) < wqi_size);
+ GEM_BUG_ON(wq_off & (wqi_size - 1));
+
+ wqi = guc->workqueue_vaddr + wq_off;
+
+ /* Now fill in the 4-word work queue item */
+ wqi->header = WQ_TYPE_INORDER |
+ (wqi_len << WQ_LEN_SHIFT) |
+ (target_engine << WQ_TARGET_SHIFT) |
+ WQ_NO_WCFLUSH_WAIT;
+ wqi->context_desc = context_desc;
+ wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
+ GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
+ wqi->fence_id = fence_id;
+
+ /* Make the update visible to GuC */
+ WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
+}
+
+static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+{
+ struct intel_engine_cs *engine = rq->engine;
+ u32 ctx_desc = lower_32_bits(rq->context->lrc_desc);
+ u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
+
+ guc_wq_item_append(guc, engine->guc_id, ctx_desc,
+ ring_tail, rq->fence.seqno);
+}
+
+/*
+ * When we're doing submissions using regular execlists backend, writing to
+ * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
+ * pinned in mappable aperture portion of GGTT are visible to command streamer.
+ * Writes done by GuC on our behalf are not guaranteeing such ordering,
+ * therefore, to ensure the flush, we're issuing a POSTING READ.
+ */
+static void flush_ggtt_writes(struct i915_vma *vma)
+{
+ if (i915_vma_is_map_and_fenceable(vma))
+ intel_uncore_posting_read_fw(vma->vm->gt->uncore,
+ GUC_STATUS);
+}
+
+static void guc_submit(struct intel_engine_cs *engine,
+ struct i915_request **out,
+ struct i915_request **end)
+{
+ struct intel_guc *guc = &engine->gt->uc.guc;
+
+ spin_lock(&guc->wq_lock);
+
+ do {
+ struct i915_request *rq = *out++;
+
+ flush_ggtt_writes(rq->ring->vma);
+ guc_add_request(guc, rq);
+ } while (out != end);
+
+ spin_unlock(&guc->wq_lock);
+}
+
+static inline int rq_prio(const struct i915_request *rq)
+{
+ return rq->sched.attr.priority | __NO_PREEMPTION;
+}
+
+static struct i915_request *schedule_in(struct i915_request *rq, int idx)
+{
+ trace_i915_request_in(rq, idx);
+
+ /*
+ * Currently we are not tracking the rq->context being inflight
+ * (ce->inflight = rq->engine). It is only used by the execlists
+ * backend at the moment, a similar counting strategy would be
+ * required if we generalise the inflight tracking.
+ */
+
+ __intel_gt_pm_get(rq->engine->gt);
+ return i915_request_get(rq);
+}
+
+static void schedule_out(struct i915_request *rq)
+{
+ trace_i915_request_out(rq);
+
+ intel_gt_pm_put_async(rq->engine->gt);
+ i915_request_put(rq);
+}
+
+static void __guc_dequeue(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request **first = execlists->inflight;
+ struct i915_request ** const last_port = first + execlists->port_mask;
+ struct i915_request *last = first[0];
+ struct i915_request **port;
+ bool submit = false;
+ struct rb_node *rb;
+
+ lockdep_assert_held(&engine->active.lock);
+
+ if (last) {
+ if (*++first)
+ return;
+
+ last = NULL;
+ }
+
+ /*
+ * We write directly into the execlists->inflight queue and don't use
+ * the execlists->pending queue, as we don't have a distinct switch
+ * event.
+ */
+ port = first;
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ struct i915_request *rq, *rn;
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ if (last && rq->context != last->context) {
+ if (port == last_port)
+ goto done;
+
+ *port = schedule_in(last,
+ port - execlists->inflight);
+ port++;
+ }
+
+ list_del_init(&rq->sched.link);
+ __i915_request_submit(rq);
+ submit = true;
+ last = rq;
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+done:
+ execlists->queue_priority_hint =
+ rb ? to_priolist(rb)->priority : INT_MIN;
+ if (submit) {
+ *port = schedule_in(last, port - execlists->inflight);
+ *++port = NULL;
+ guc_submit(engine, first, port);
+ }
+ execlists->active = execlists->inflight;
+}
+
+static void guc_submission_tasklet(unsigned long data)
+{
+ struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request **port, *rq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ for (port = execlists->inflight; (rq = *port); port++) {
+ if (!i915_request_completed(rq))
+ break;
+
+ schedule_out(rq);
+ }
+ if (port != execlists->inflight) {
+ int idx = port - execlists->inflight;
+ int rem = ARRAY_SIZE(execlists->inflight) - idx;
+ memmove(execlists->inflight, port, rem * sizeof(*port));
+ }
+
+ __guc_dequeue(engine);
+
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void guc_reset_prepare(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ ENGINE_TRACE(engine, "\n");
+
+ /*
+ * Prevent request submission to the hardware until we have
+ * completed the reset in i915_gem_reset_finish(). If a request
+ * is completed by one engine, it may then queue a request
+ * to a second via its execlists->tasklet *just* as we are
+ * calling engine->init_hw() and also writing the ELSP.
+ * Turning off the execlists->tasklet until the reset is over
+ * prevents the race.
+ */
+ __tasklet_disable_sync_once(&execlists->tasklet);
+}
+
+static void
+cancel_port_requests(struct intel_engine_execlists * const execlists)
+{
+ struct i915_request * const *port, *rq;
+
+ /* Note we are only using the inflight and not the pending queue */
+
+ for (port = execlists->active; (rq = *port); port++)
+ schedule_out(rq);
+ execlists->active =
+ memset(execlists->inflight, 0, sizeof(execlists->inflight));
+}
+
+static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *rq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ cancel_port_requests(execlists);
+
+ /* Push back any incomplete requests for replay after the reset. */
+ rq = execlists_unwind_incomplete_requests(execlists);
+ if (!rq)
+ goto out_unlock;
+
+ if (!i915_request_started(rq))
+ stalled = false;
+
+ __i915_request_reset(rq, stalled);
+ intel_lr_context_reset(engine, rq->context, rq->head, stalled);
+
+out_unlock:
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void guc_reset_cancel(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+ struct i915_request *rq, *rn;
+ struct rb_node *rb;
+ unsigned long flags;
+
+ ENGINE_TRACE(engine, "\n");
+
+ /*
+ * Before we call engine->cancel_requests(), we should have exclusive
+ * access to the submission state. This is arranged for us by the
+ * caller disabling the interrupt generation, the tasklet and other
+ * threads that may then access the same state, giving us a free hand
+ * to reset state. However, we still need to let lockdep be aware that
+ * we know this state may be accessed in hardirq context, so we
+ * disable the irq around this manipulation and we want to keep
+ * the spinlock focused on its duties and not accidentally conflate
+ * coverage to the submission's irq state. (Similarly, although we
+ * shouldn't need to disable irq around the manipulation of the
+ * submission's irq state, we also wish to remind ourselves that
+ * it is irq state.)
+ */
+ spin_lock_irqsave(&engine->active.lock, flags);
+
+ /* Cancel the requests on the HW and clear the ELSP tracker. */
+ cancel_port_requests(execlists);
+
+ /* Mark all executing requests as skipped. */
+ list_for_each_entry(rq, &engine->active.requests, sched.link) {
+ if (!i915_request_signaled(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+
+ i915_request_mark_complete(rq);
+ }
+
+ /* Flush the queued requests to the timeline list (for retiring). */
+ while ((rb = rb_first_cached(&execlists->queue))) {
+ struct i915_priolist *p = to_priolist(rb);
+ int i;
+
+ priolist_for_each_request_consume(rq, rn, p, i) {
+ list_del_init(&rq->sched.link);
+ __i915_request_submit(rq);
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+ }
+
+ rb_erase_cached(&p->node, &execlists->queue);
+ i915_priolist_free(p);
+ }
+
+ /* Remaining _unready_ requests will be nop'ed when submitted */
+
+ execlists->queue_priority_hint = INT_MIN;
+ execlists->queue = RB_ROOT_CACHED;
+
+ spin_unlock_irqrestore(&engine->active.lock, flags);
+}
+
+static void guc_reset_finish(struct intel_engine_cs *engine)
+{
+ struct intel_engine_execlists * const execlists = &engine->execlists;
+
+ if (__tasklet_enable(&execlists->tasklet))
+ /* And kick in case we missed a new request submission. */
+ tasklet_hi_schedule(&execlists->tasklet);
+
+ ENGINE_TRACE(engine, "depth->%d\n",
+ atomic_read(&execlists->tasklet.count));
+}
+
+/*
+ * Everything below here is concerned with setup & teardown, and is
+ * therefore not part of the somewhat time-critical batch-submission
+ * path of guc_submit() above.
+ */
+
+/*
+ * Set up the memory resources to be shared with the GuC (via the GGTT)
+ * at firmware loading time.
+ */
+int intel_guc_submission_init(struct intel_guc *guc)
+{
+ int ret;
+
+ if (guc->stage_desc_pool)
+ return 0;
+
+ ret = guc_stage_desc_pool_create(guc);
+ if (ret)
+ return ret;
+ /*
+ * Keep static analysers happy, let them know that we allocated the
+ * vma after testing that it didn't exist earlier.
+ */
+ GEM_BUG_ON(!guc->stage_desc_pool);
+
+ ret = guc_workqueue_create(guc);
+ if (ret)
+ goto err_pool;
+
+ ret = guc_proc_desc_create(guc);
+ if (ret)
+ goto err_workqueue;
+
+ spin_lock_init(&guc->wq_lock);
+
+ return 0;
+
+err_workqueue:
+ guc_workqueue_destroy(guc);
+err_pool:
+ guc_stage_desc_pool_destroy(guc);
+ return ret;
+}
+
+void intel_guc_submission_fini(struct intel_guc *guc)
+{
+ if (guc->stage_desc_pool) {
+ guc_proc_desc_destroy(guc);
+ guc_workqueue_destroy(guc);
+ guc_stage_desc_pool_destroy(guc);
+ }
+}
+
+static void guc_interrupts_capture(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+ u32 dmask = irqs << 16 | irqs;
+
+ GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
+
+ /* Don't handle the ctx switch interrupt in GuC submission mode */
+ intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, dmask, 0);
+ intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, dmask, 0);
+}
+
+static void guc_interrupts_release(struct intel_gt *gt)
+{
+ struct intel_uncore *uncore = gt->uncore;
+ u32 irqs = GT_CONTEXT_SWITCH_INTERRUPT;
+ u32 dmask = irqs << 16 | irqs;
+
+ GEM_BUG_ON(INTEL_GEN(gt->i915) < 11);
+
+ /* Handle ctx switch interrupts again */
+ intel_uncore_rmw(uncore, GEN11_RENDER_COPY_INTR_ENABLE, 0, dmask);
+ intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
+}
+
+static void guc_set_default_submission(struct intel_engine_cs *engine)
+{
+ /*
+ * We inherit a bunch of functions from execlists that we'd like
+ * to keep using:
+ *
+ * engine->submit_request = execlists_submit_request;
+ * engine->cancel_requests = execlists_cancel_requests;
+ * engine->schedule = execlists_schedule;
+ *
+ * But we need to override the actual submission backend in order
+ * to talk to the GuC.
+ */
+ intel_execlists_set_default_submission(engine);
+
+ engine->execlists.tasklet.func = guc_submission_tasklet;
+
+ /* do not use execlists park/unpark */
+ engine->park = engine->unpark = NULL;
+
+ engine->reset.prepare = guc_reset_prepare;
+ engine->reset.rewind = guc_reset_rewind;
+ engine->reset.cancel = guc_reset_cancel;
+ engine->reset.finish = guc_reset_finish;
+
+ engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
+ engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
+
+ /*
+ * For the breadcrumb irq to work we need the interrupts to stay
+ * enabled. However, on all platforms on which we'll have support for
+ * GuC submission we don't allow disabling the interrupts at runtime, so
+ * we're always safe with the current flow.
+ */
+ GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
+}
+
+void intel_guc_submission_enable(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ /*
+ * We're using GuC work items for submitting work through GuC. Since
+ * we're coalescing multiple requests from a single context into a
+ * single work item prior to assigning it to execlist_port, we can
+ * never have more work items than the total number of ports (for all
+ * engines). The GuC firmware is controlling the HEAD of work queue,
+ * and it is guaranteed that it will remove the work item from the
+ * queue before our request is completed.
+ */
+ BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) *
+ sizeof(struct guc_wq_item) *
+ I915_NUM_ENGINES > GUC_WQ_SIZE);
+
+ guc_proc_desc_init(guc);
+ guc_stage_desc_init(guc);
+
+ /* Take over from manual control of ELSP (execlists) */
+ guc_interrupts_capture(gt);
+
+ for_each_engine(engine, gt, id) {
+ engine->set_default_submission = guc_set_default_submission;
+ engine->set_default_submission(engine);
+ }
+}
+
+void intel_guc_submission_disable(struct intel_guc *guc)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ GEM_BUG_ON(gt->awake); /* GT should be parked first */
+
+ /* Note: By the time we're here, GuC may have already been reset */
+
+ guc_interrupts_release(gt);
+
+ guc_stage_desc_fini(guc);
+ guc_proc_desc_fini(guc);
+}
+
+static bool __guc_submission_support(struct intel_guc *guc)
+{
+ /* XXX: GuC submission is unavailable for now */
+ return false;
+
+ if (!intel_guc_is_supported(guc))
+ return false;
+
+ return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
+}
+
+void intel_guc_submission_init_early(struct intel_guc *guc)
+{
+ guc->submission_supported = __guc_submission_support(guc);
+}
+
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine)
+{
+ return engine->set_default_submission == guc_set_default_submission;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
new file mode 100644
index 000000000000..e402a2932592
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_GUC_SUBMISSION_H_
+#define _INTEL_GUC_SUBMISSION_H_
+
+#include <linux/types.h>
+
+struct intel_guc;
+struct intel_engine_cs;
+
+void intel_guc_submission_init_early(struct intel_guc *guc);
+int intel_guc_submission_init(struct intel_guc *guc);
+void intel_guc_submission_enable(struct intel_guc *guc);
+void intel_guc_submission_disable(struct intel_guc *guc);
+void intel_guc_submission_fini(struct intel_guc *guc);
+int intel_guc_preempt_work_create(struct intel_guc *guc);
+void intel_guc_preempt_work_destroy(struct intel_guc *guc);
+bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
new file mode 100644
index 000000000000..32a069841c14
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2016-2019 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+#include "gt/intel_gt.h"
+#include "intel_huc.h"
+#include "i915_drv.h"
+
+/**
+ * DOC: HuC
+ *
+ * The HuC is a dedicated microcontroller for usage in media HEVC (High
+ * Efficiency Video Coding) operations. Userspace can directly use the firmware
+ * capabilities by adding HuC specific commands to batch buffers.
+ *
+ * The kernel driver is only responsible for loading the HuC firmware and
+ * triggering its security authentication, which is performed by the GuC. For
+ * The GuC to correctly perform the authentication, the HuC binary must be
+ * loaded before the GuC one. Loading the HuC is optional; however, not using
+ * the HuC might negatively impact power usage and/or performance of media
+ * workloads, depending on the use-cases.
+ *
+ * See https://github.com/intel/media-driver for the latest details on HuC
+ * functionality.
+ */
+
+/**
+ * DOC: HuC Memory Management
+ *
+ * Similarly to the GuC, the HuC can't do any memory allocations on its own,
+ * with the difference being that the allocations for HuC usage are handled by
+ * the userspace driver instead of the kernel one. The HuC accesses the memory
+ * via the PPGTT belonging to the context loaded on the VCS executing the
+ * HuC-specific commands.
+ */
+
+void intel_huc_init_early(struct intel_huc *huc)
+{
+ struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
+
+ intel_huc_fw_init_early(huc);
+
+ if (INTEL_GEN(i915) >= 11) {
+ huc->status.reg = GEN11_HUC_KERNEL_LOAD_INFO;
+ huc->status.mask = HUC_LOAD_SUCCESSFUL;
+ huc->status.value = HUC_LOAD_SUCCESSFUL;
+ } else {
+ huc->status.reg = HUC_STATUS2;
+ huc->status.mask = HUC_FW_VERIFIED;
+ huc->status.value = HUC_FW_VERIFIED;
+ }
+}
+
+static int intel_huc_rsa_data_create(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ struct intel_guc *guc = &gt->uc.guc;
+ struct i915_vma *vma;
+ size_t copied;
+ void *vaddr;
+ int err;
+
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
+ if (err)
+ return err;
+
+ /*
+ * HuC firmware will sit above GUC_GGTT_TOP and will not map
+ * through GTT. Unfortunately, this means GuC cannot perform
+ * the HuC auth. as the rsa offset now falls within the GuC
+ * inaccessible range. We resort to perma-pinning an additional
+ * vma within the accessible range that only contains the rsa
+ * signature. The GuC can use this extra pinning to perform
+ * the authentication since its GGTT offset will be GuC
+ * accessible.
+ */
+ GEM_BUG_ON(huc->fw.rsa_size > PAGE_SIZE);
+ vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+
+ vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ i915_vma_unpin_and_release(&vma, 0);
+ return PTR_ERR(vaddr);
+ }
+
+ copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
+ GEM_BUG_ON(copied < huc->fw.rsa_size);
+
+ i915_gem_object_unpin_map(vma->obj);
+
+ huc->rsa_data = vma;
+
+ return 0;
+}
+
+static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
+{
+ i915_vma_unpin_and_release(&huc->rsa_data, 0);
+}
+
+int intel_huc_init(struct intel_huc *huc)
+{
+ struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
+ int err;
+
+ err = intel_uc_fw_init(&huc->fw);
+ if (err)
+ goto out;
+
+ /*
+ * HuC firmware image is outside GuC accessible range.
+ * Copy the RSA signature out of the image into
+ * a perma-pinned region set aside for it
+ */
+ err = intel_huc_rsa_data_create(huc);
+ if (err)
+ goto out_fini;
+
+ return 0;
+
+out_fini:
+ intel_uc_fw_fini(&huc->fw);
+out:
+ intel_uc_fw_cleanup_fetch(&huc->fw);
+ DRM_DEV_DEBUG_DRIVER(i915->drm.dev, "failed with %d\n", err);
+ return err;
+}
+
+void intel_huc_fini(struct intel_huc *huc)
+{
+ if (!intel_uc_fw_is_available(&huc->fw))
+ return;
+
+ intel_huc_rsa_data_destroy(huc);
+ intel_uc_fw_fini(&huc->fw);
+}
+
+/**
+ * intel_huc_auth() - Authenticate HuC uCode
+ * @huc: intel_huc structure
+ *
+ * Called after HuC and GuC firmware loading during intel_uc_init_hw().
+ *
+ * This function invokes the GuC action to authenticate the HuC firmware,
+ * passing the offset of the RSA signature to intel_guc_auth_huc(). It then
+ * waits for up to 50ms for firmware verification ACK.
+ */
+int intel_huc_auth(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ struct intel_guc *guc = &gt->uc.guc;
+ int ret;
+
+ GEM_BUG_ON(intel_huc_is_authenticated(huc));
+
+ if (!intel_uc_fw_is_loaded(&huc->fw))
+ return -ENOEXEC;
+
+ ret = i915_inject_probe_error(gt->i915, -ENXIO);
+ if (ret)
+ goto fail;
+
+ ret = intel_guc_auth_huc(guc,
+ intel_guc_ggtt_offset(guc, huc->rsa_data));
+ if (ret) {
+ DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret);
+ goto fail;
+ }
+
+ /* Check authentication status, it should be done by now */
+ ret = __intel_wait_for_register(gt->uncore,
+ huc->status.reg,
+ huc->status.mask,
+ huc->status.value,
+ 2, 50, NULL);
+ if (ret) {
+ DRM_ERROR("HuC: Firmware not verified %d\n", ret);
+ goto fail;
+ }
+
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+ return 0;
+
+fail:
+ i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_FAIL);
+ return ret;
+}
+
+/**
+ * intel_huc_check_status() - check HuC status
+ * @huc: intel_huc structure
+ *
+ * This function reads status register to verify if HuC
+ * firmware was successfully loaded.
+ *
+ * Returns: 1 if HuC firmware is loaded and verified,
+ * 0 if HuC firmware is not loaded and -ENODEV if HuC
+ * is not present on this platform.
+ */
+int intel_huc_check_status(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ intel_wakeref_t wakeref;
+ u32 status = 0;
+
+ if (!intel_huc_is_supported(huc))
+ return -ENODEV;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ status = intel_uncore_read(gt->uncore, huc->status.reg);
+
+ return (status & huc->status.mask) == huc->status.value;
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
new file mode 100644
index 000000000000..644c059fe01d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_HUC_H_
+#define _INTEL_HUC_H_
+
+#include "i915_reg.h"
+#include "intel_uc_fw.h"
+#include "intel_huc_fw.h"
+
+struct intel_huc {
+ /* Generic uC firmware management */
+ struct intel_uc_fw fw;
+
+ /* HuC-specific additions */
+ struct i915_vma *rsa_data;
+
+ struct {
+ i915_reg_t reg;
+ u32 mask;
+ u32 value;
+ } status;
+};
+
+void intel_huc_init_early(struct intel_huc *huc);
+int intel_huc_init(struct intel_huc *huc);
+void intel_huc_fini(struct intel_huc *huc);
+int intel_huc_auth(struct intel_huc *huc);
+int intel_huc_check_status(struct intel_huc *huc);
+
+static inline int intel_huc_sanitize(struct intel_huc *huc)
+{
+ intel_uc_fw_sanitize(&huc->fw);
+ return 0;
+}
+
+static inline bool intel_huc_is_supported(struct intel_huc *huc)
+{
+ return intel_uc_fw_is_supported(&huc->fw);
+}
+
+static inline bool intel_huc_is_enabled(struct intel_huc *huc)
+{
+ return intel_uc_fw_is_enabled(&huc->fw);
+}
+
+static inline bool intel_huc_is_authenticated(struct intel_huc *huc)
+{
+ return intel_uc_fw_is_running(&huc->fw);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
new file mode 100644
index 000000000000..eee193bf2cc4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "intel_huc_fw.h"
+#include "i915_drv.h"
+
+/**
+ * intel_huc_fw_init_early() - initializes HuC firmware struct
+ * @huc: intel_huc struct
+ *
+ * On platforms with HuC selects firmware for uploading
+ */
+void intel_huc_fw_init_early(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ struct intel_uc *uc = &gt->uc;
+ struct drm_i915_private *i915 = gt->i915;
+
+ intel_uc_fw_init_early(&huc->fw, INTEL_UC_FW_TYPE_HUC,
+ intel_uc_uses_guc(uc),
+ INTEL_INFO(i915)->platform, INTEL_REVID(i915));
+}
+
+/**
+ * intel_huc_fw_upload() - load HuC uCode to device
+ * @huc: intel_huc structure
+ *
+ * Called from intel_uc_init_hw() during driver load, resume from sleep and
+ * after a GPU reset. Note that HuC must be loaded before GuC.
+ *
+ * The firmware image should have already been fetched into memory, so only
+ * check that fetch succeeded, and then transfer the image to the h/w.
+ *
+ * Return: non-zero code on error
+ */
+int intel_huc_fw_upload(struct intel_huc *huc)
+{
+ /* HW doesn't look at destination address for HuC, so set it to 0 */
+ return intel_uc_fw_upload(&huc->fw, 0, HUC_UKERNEL);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
new file mode 100644
index 000000000000..b791269ce923
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_HUC_FW_H_
+#define _INTEL_HUC_FW_H_
+
+struct intel_huc;
+
+void intel_huc_fw_init_early(struct intel_huc *huc);
+int intel_huc_fw_upload(struct intel_huc *huc);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
new file mode 100644
index 000000000000..64934a876a50
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2016-2019 Intel Corporation
+ */
+
+#include "gt/intel_gt.h"
+#include "gt/intel_reset.h"
+#include "intel_guc.h"
+#include "intel_guc_ads.h"
+#include "intel_guc_submission.h"
+#include "intel_uc.h"
+
+#include "i915_drv.h"
+
+static const struct intel_uc_ops uc_ops_off;
+static const struct intel_uc_ops uc_ops_on;
+
+/* Reset GuC providing us with fresh state for both GuC and HuC.
+ */
+static int __intel_uc_reset_hw(struct intel_uc *uc)
+{
+ struct intel_gt *gt = uc_to_gt(uc);
+ int ret;
+ u32 guc_status;
+
+ ret = i915_inject_probe_error(gt->i915, -ENXIO);
+ if (ret)
+ return ret;
+
+ ret = intel_reset_guc(gt);
+ if (ret) {
+ DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
+ return ret;
+ }
+
+ guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
+ WARN(!(guc_status & GS_MIA_IN_RESET),
+ "GuC status: 0x%x, MIA core expected to be in reset\n",
+ guc_status);
+
+ return ret;
+}
+
+static void __confirm_options(struct intel_uc *uc)
+{
+ struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+
+ DRM_DEV_DEBUG_DRIVER(i915->drm.dev,
+ "enable_guc=%d (guc:%s submission:%s huc:%s)\n",
+ i915_modparams.enable_guc,
+ yesno(intel_uc_uses_guc(uc)),
+ yesno(intel_uc_uses_guc_submission(uc)),
+ yesno(intel_uc_uses_huc(uc)));
+
+ if (i915_modparams.enable_guc == -1)
+ return;
+
+ if (i915_modparams.enable_guc == 0) {
+ GEM_BUG_ON(intel_uc_uses_guc(uc));
+ GEM_BUG_ON(intel_uc_uses_guc_submission(uc));
+ GEM_BUG_ON(intel_uc_uses_huc(uc));
+ return;
+ }
+
+ if (!intel_uc_supports_guc(uc))
+ dev_info(i915->drm.dev,
+ "Incompatible option enable_guc=%d - %s\n",
+ i915_modparams.enable_guc, "GuC is not supported!");
+
+ if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC &&
+ !intel_uc_supports_huc(uc))
+ dev_info(i915->drm.dev,
+ "Incompatible option enable_guc=%d - %s\n",
+ i915_modparams.enable_guc, "HuC is not supported!");
+
+ if (i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION &&
+ !intel_uc_supports_guc_submission(uc))
+ dev_info(i915->drm.dev,
+ "Incompatible option enable_guc=%d - %s\n",
+ i915_modparams.enable_guc, "GuC submission is N/A");
+
+ if (i915_modparams.enable_guc & ~(ENABLE_GUC_SUBMISSION |
+ ENABLE_GUC_LOAD_HUC))
+ dev_info(i915->drm.dev,
+ "Incompatible option enable_guc=%d - %s\n",
+ i915_modparams.enable_guc, "undocumented flag");
+}
+
+void intel_uc_init_early(struct intel_uc *uc)
+{
+ intel_guc_init_early(&uc->guc);
+ intel_huc_init_early(&uc->huc);
+
+ __confirm_options(uc);
+
+ if (intel_uc_uses_guc(uc))
+ uc->ops = &uc_ops_on;
+ else
+ uc->ops = &uc_ops_off;
+}
+
+void intel_uc_driver_late_release(struct intel_uc *uc)
+{
+}
+
+/**
+ * intel_uc_init_mmio - setup uC MMIO access
+ * @uc: the intel_uc structure
+ *
+ * Setup minimal state necessary for MMIO accesses later in the
+ * initialization sequence.
+ */
+void intel_uc_init_mmio(struct intel_uc *uc)
+{
+ intel_guc_init_send_regs(&uc->guc);
+}
+
+static void __uc_capture_load_err_log(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+
+ if (guc->log.vma && !uc->load_err_log)
+ uc->load_err_log = i915_gem_object_get(guc->log.vma->obj);
+}
+
+static void __uc_free_load_err_log(struct intel_uc *uc)
+{
+ struct drm_i915_gem_object *log = fetch_and_zero(&uc->load_err_log);
+
+ if (log)
+ i915_gem_object_put(log);
+}
+
+static inline bool guc_communication_enabled(struct intel_guc *guc)
+{
+ return intel_guc_ct_enabled(&guc->ct);
+}
+
+/*
+ * Events triggered while CT buffers are disabled are logged in the SCRATCH_15
+ * register using the same bits used in the CT message payload. Since our
+ * communication channel with guc is turned off at this point, we can save the
+ * message and handle it after we turn it back on.
+ */
+static void guc_clear_mmio_msg(struct intel_guc *guc)
+{
+ intel_uncore_write(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15), 0);
+}
+
+static void guc_get_mmio_msg(struct intel_guc *guc)
+{
+ u32 val;
+
+ spin_lock_irq(&guc->irq_lock);
+
+ val = intel_uncore_read(guc_to_gt(guc)->uncore, SOFT_SCRATCH(15));
+ guc->mmio_msg |= val & guc->msg_enabled_mask;
+
+ /*
+ * clear all events, including the ones we're not currently servicing,
+ * to make sure we don't try to process a stale message if we enable
+ * handling of more events later.
+ */
+ guc_clear_mmio_msg(guc);
+
+ spin_unlock_irq(&guc->irq_lock);
+}
+
+static void guc_handle_mmio_msg(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+
+ /* we need communication to be enabled to reply to GuC */
+ GEM_BUG_ON(!guc_communication_enabled(guc));
+
+ if (!guc->mmio_msg)
+ return;
+
+ spin_lock_irq(&i915->irq_lock);
+ intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1);
+ spin_unlock_irq(&i915->irq_lock);
+
+ guc->mmio_msg = 0;
+}
+
+static void guc_reset_interrupts(struct intel_guc *guc)
+{
+ guc->interrupts.reset(guc);
+}
+
+static void guc_enable_interrupts(struct intel_guc *guc)
+{
+ guc->interrupts.enable(guc);
+}
+
+static void guc_disable_interrupts(struct intel_guc *guc)
+{
+ guc->interrupts.disable(guc);
+}
+
+static int guc_enable_communication(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int ret;
+
+ GEM_BUG_ON(guc_communication_enabled(guc));
+
+ ret = i915_inject_probe_error(i915, -ENXIO);
+ if (ret)
+ return ret;
+
+ ret = intel_guc_ct_enable(&guc->ct);
+ if (ret)
+ return ret;
+
+ /* check for mmio messages received before/during the CT enable */
+ guc_get_mmio_msg(guc);
+ guc_handle_mmio_msg(guc);
+
+ guc_enable_interrupts(guc);
+
+ /* check for CT messages received before we enabled interrupts */
+ spin_lock_irq(&i915->irq_lock);
+ intel_guc_ct_event_handler(&guc->ct);
+ spin_unlock_irq(&i915->irq_lock);
+
+ DRM_INFO("GuC communication enabled\n");
+
+ return 0;
+}
+
+static void guc_disable_communication(struct intel_guc *guc)
+{
+ /*
+ * Events generated during or after CT disable are logged by guc in
+ * via mmio. Make sure the register is clear before disabling CT since
+ * all events we cared about have already been processed via CT.
+ */
+ guc_clear_mmio_msg(guc);
+
+ guc_disable_interrupts(guc);
+
+ intel_guc_ct_disable(&guc->ct);
+
+ /*
+ * Check for messages received during/after the CT disable. We do not
+ * expect any messages to have arrived via CT between the interrupt
+ * disable and the CT disable because GuC should've been idle until we
+ * triggered the CT disable protocol.
+ */
+ guc_get_mmio_msg(guc);
+
+ DRM_INFO("GuC communication disabled\n");
+}
+
+static void __uc_fetch_firmwares(struct intel_uc *uc)
+{
+ int err;
+
+ GEM_BUG_ON(!intel_uc_uses_guc(uc));
+
+ err = intel_uc_fw_fetch(&uc->guc.fw);
+ if (err)
+ return;
+
+ if (intel_uc_uses_huc(uc))
+ intel_uc_fw_fetch(&uc->huc.fw);
+}
+
+static void __uc_cleanup_firmwares(struct intel_uc *uc)
+{
+ intel_uc_fw_cleanup_fetch(&uc->huc.fw);
+ intel_uc_fw_cleanup_fetch(&uc->guc.fw);
+}
+
+static void __uc_init(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+ struct intel_huc *huc = &uc->huc;
+ int ret;
+
+ GEM_BUG_ON(!intel_uc_uses_guc(uc));
+
+ /* XXX: GuC submission is unavailable for now */
+ GEM_BUG_ON(intel_uc_supports_guc_submission(uc));
+
+ ret = intel_guc_init(guc);
+ if (ret) {
+ intel_uc_fw_cleanup_fetch(&huc->fw);
+ return;
+ }
+
+ if (intel_uc_uses_huc(uc))
+ intel_huc_init(huc);
+}
+
+static void __uc_fini(struct intel_uc *uc)
+{
+ intel_huc_fini(&uc->huc);
+ intel_guc_fini(&uc->guc);
+
+ __uc_free_load_err_log(uc);
+}
+
+static int __uc_sanitize(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+ struct intel_huc *huc = &uc->huc;
+
+ GEM_BUG_ON(!intel_uc_supports_guc(uc));
+
+ intel_huc_sanitize(huc);
+ intel_guc_sanitize(guc);
+
+ return __intel_uc_reset_hw(uc);
+}
+
+/* Initialize and verify the uC regs related to uC positioning in WOPCM */
+static int uc_init_wopcm(struct intel_uc *uc)
+{
+ struct intel_gt *gt = uc_to_gt(uc);
+ struct intel_uncore *uncore = gt->uncore;
+ u32 base = intel_wopcm_guc_base(&gt->i915->wopcm);
+ u32 size = intel_wopcm_guc_size(&gt->i915->wopcm);
+ u32 huc_agent = intel_uc_uses_huc(uc) ? HUC_LOADING_AGENT_GUC : 0;
+ u32 mask;
+ int err;
+
+ if (unlikely(!base || !size)) {
+ i915_probe_error(gt->i915, "Unsuccessful WOPCM partitioning\n");
+ return -E2BIG;
+ }
+
+ GEM_BUG_ON(!intel_uc_supports_guc(uc));
+ GEM_BUG_ON(!(base & GUC_WOPCM_OFFSET_MASK));
+ GEM_BUG_ON(base & ~GUC_WOPCM_OFFSET_MASK);
+ GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
+ GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
+
+ err = i915_inject_probe_error(gt->i915, -ENXIO);
+ if (err)
+ return err;
+
+ mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
+ err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask,
+ size | GUC_WOPCM_SIZE_LOCKED);
+ if (err)
+ goto err_out;
+
+ mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent;
+ err = intel_uncore_write_and_verify(uncore, DMA_GUC_WOPCM_OFFSET,
+ base | huc_agent, mask,
+ base | huc_agent |
+ GUC_WOPCM_OFFSET_VALID);
+ if (err)
+ goto err_out;
+
+ return 0;
+
+err_out:
+ i915_probe_error(gt->i915, "Failed to init uC WOPCM registers!\n");
+ i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
+ i915_mmio_reg_offset(DMA_GUC_WOPCM_OFFSET),
+ intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET));
+ i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
+ i915_mmio_reg_offset(GUC_WOPCM_SIZE),
+ intel_uncore_read(uncore, GUC_WOPCM_SIZE));
+
+ return err;
+}
+
+static bool uc_is_wopcm_locked(struct intel_uc *uc)
+{
+ struct intel_gt *gt = uc_to_gt(uc);
+ struct intel_uncore *uncore = gt->uncore;
+
+ return (intel_uncore_read(uncore, GUC_WOPCM_SIZE) & GUC_WOPCM_SIZE_LOCKED) ||
+ (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
+}
+
+static int __uc_check_hw(struct intel_uc *uc)
+{
+ if (!intel_uc_supports_guc(uc))
+ return 0;
+
+ /*
+ * We can silently continue without GuC only if it was never enabled
+ * before on this system after reboot, otherwise we risk GPU hangs.
+ * To check if GuC was loaded before we look at WOPCM registers.
+ */
+ if (uc_is_wopcm_locked(uc))
+ return -EIO;
+
+ return 0;
+}
+
+static int __uc_init_hw(struct intel_uc *uc)
+{
+ struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+ struct intel_guc *guc = &uc->guc;
+ struct intel_huc *huc = &uc->huc;
+ int ret, attempts;
+
+ GEM_BUG_ON(!intel_uc_supports_guc(uc));
+ GEM_BUG_ON(!intel_uc_uses_guc(uc));
+
+ if (!intel_uc_fw_is_available(&guc->fw)) {
+ ret = __uc_check_hw(uc) ||
+ intel_uc_fw_is_overridden(&guc->fw) ||
+ intel_uc_supports_guc_submission(uc) ?
+ intel_uc_fw_status_to_error(guc->fw.status) : 0;
+ goto err_out;
+ }
+
+ ret = uc_init_wopcm(uc);
+ if (ret)
+ goto err_out;
+
+ guc_reset_interrupts(guc);
+
+ /* WaEnableuKernelHeaderValidFix:skl */
+ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
+ if (IS_GEN(i915, 9))
+ attempts = 3;
+ else
+ attempts = 1;
+
+ while (attempts--) {
+ /*
+ * Always reset the GuC just before (re)loading, so
+ * that the state and timing are fairly predictable
+ */
+ ret = __uc_sanitize(uc);
+ if (ret)
+ goto err_out;
+
+ intel_huc_fw_upload(huc);
+ intel_guc_ads_reset(guc);
+ intel_guc_write_params(guc);
+ ret = intel_guc_fw_upload(guc);
+ if (ret == 0)
+ break;
+
+ DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
+ "retry %d more time(s)\n", ret, attempts);
+ }
+
+ /* Did we succeded or run out of retries? */
+ if (ret)
+ goto err_log_capture;
+
+ ret = guc_enable_communication(guc);
+ if (ret)
+ goto err_log_capture;
+
+ intel_huc_auth(huc);
+
+ ret = intel_guc_sample_forcewake(guc);
+ if (ret)
+ goto err_communication;
+
+ if (intel_uc_supports_guc_submission(uc))
+ intel_guc_submission_enable(guc);
+
+ dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+ intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_GUC), guc->fw.path,
+ guc->fw.major_ver_found, guc->fw.minor_ver_found,
+ "submission",
+ enableddisabled(intel_uc_supports_guc_submission(uc)));
+
+ if (intel_uc_uses_huc(uc)) {
+ dev_info(i915->drm.dev, "%s firmware %s version %u.%u %s:%s\n",
+ intel_uc_fw_type_repr(INTEL_UC_FW_TYPE_HUC),
+ huc->fw.path,
+ huc->fw.major_ver_found, huc->fw.minor_ver_found,
+ "authenticated",
+ yesno(intel_huc_is_authenticated(huc)));
+ }
+
+ return 0;
+
+ /*
+ * We've failed to load the firmware :(
+ */
+err_communication:
+ guc_disable_communication(guc);
+err_log_capture:
+ __uc_capture_load_err_log(uc);
+err_out:
+ __uc_sanitize(uc);
+
+ if (!ret) {
+ dev_notice(i915->drm.dev, "GuC is uninitialized\n");
+ /* We want to run without GuC submission */
+ return 0;
+ }
+
+ i915_probe_error(i915, "GuC initialization failed %d\n", ret);
+
+ /* We want to keep KMS alive */
+ return -EIO;
+}
+
+static void __uc_fini_hw(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+
+ if (!intel_guc_is_running(guc))
+ return;
+
+ if (intel_uc_supports_guc_submission(uc))
+ intel_guc_submission_disable(guc);
+
+ if (guc_communication_enabled(guc))
+ guc_disable_communication(guc);
+
+ __uc_sanitize(uc);
+}
+
+/**
+ * intel_uc_reset_prepare - Prepare for reset
+ * @uc: the intel_uc structure
+ *
+ * Preparing for full gpu reset.
+ */
+void intel_uc_reset_prepare(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+
+ if (!intel_guc_is_running(guc))
+ return;
+
+ guc_disable_communication(guc);
+ __uc_sanitize(uc);
+}
+
+void intel_uc_runtime_suspend(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+ int err;
+
+ if (!intel_guc_is_running(guc))
+ return;
+
+ err = intel_guc_suspend(guc);
+ if (err)
+ DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
+
+ guc_disable_communication(guc);
+}
+
+void intel_uc_suspend(struct intel_uc *uc)
+{
+ struct intel_guc *guc = &uc->guc;
+ intel_wakeref_t wakeref;
+
+ if (!intel_guc_is_running(guc))
+ return;
+
+ with_intel_runtime_pm(uc_to_gt(uc)->uncore->rpm, wakeref)
+ intel_uc_runtime_suspend(uc);
+}
+
+static int __uc_resume(struct intel_uc *uc, bool enable_communication)
+{
+ struct intel_guc *guc = &uc->guc;
+ int err;
+
+ if (!intel_guc_is_running(guc))
+ return 0;
+
+ /* Make sure we enable communication if and only if it's disabled */
+ GEM_BUG_ON(enable_communication == guc_communication_enabled(guc));
+
+ if (enable_communication)
+ guc_enable_communication(guc);
+
+ err = intel_guc_resume(guc);
+ if (err) {
+ DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int intel_uc_resume(struct intel_uc *uc)
+{
+ /*
+ * When coming out of S3/S4 we sanitize and re-init the HW, so
+ * communication is already re-enabled at this point.
+ */
+ return __uc_resume(uc, false);
+}
+
+int intel_uc_runtime_resume(struct intel_uc *uc)
+{
+ /*
+ * During runtime resume we don't sanitize, so we need to re-init
+ * communication as well.
+ */
+ return __uc_resume(uc, true);
+}
+
+static const struct intel_uc_ops uc_ops_off = {
+ .init_hw = __uc_check_hw,
+};
+
+static const struct intel_uc_ops uc_ops_on = {
+ .sanitize = __uc_sanitize,
+
+ .init_fw = __uc_fetch_firmwares,
+ .fini_fw = __uc_cleanup_firmwares,
+
+ .init = __uc_init,
+ .fini = __uc_fini,
+
+ .init_hw = __uc_init_hw,
+ .fini_hw = __uc_fini_hw,
+};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
new file mode 100644
index 000000000000..49c913524686
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_UC_H_
+#define _INTEL_UC_H_
+
+#include "intel_guc.h"
+#include "intel_huc.h"
+#include "i915_params.h"
+
+struct intel_uc;
+
+struct intel_uc_ops {
+ int (*sanitize)(struct intel_uc *uc);
+ void (*init_fw)(struct intel_uc *uc);
+ void (*fini_fw)(struct intel_uc *uc);
+ void (*init)(struct intel_uc *uc);
+ void (*fini)(struct intel_uc *uc);
+ int (*init_hw)(struct intel_uc *uc);
+ void (*fini_hw)(struct intel_uc *uc);
+};
+
+struct intel_uc {
+ struct intel_uc_ops const *ops;
+ struct intel_guc guc;
+ struct intel_huc huc;
+
+ /* Snapshot of GuC log from last failed load */
+ struct drm_i915_gem_object *load_err_log;
+};
+
+void intel_uc_init_early(struct intel_uc *uc);
+void intel_uc_driver_late_release(struct intel_uc *uc);
+void intel_uc_init_mmio(struct intel_uc *uc);
+void intel_uc_reset_prepare(struct intel_uc *uc);
+void intel_uc_suspend(struct intel_uc *uc);
+void intel_uc_runtime_suspend(struct intel_uc *uc);
+int intel_uc_resume(struct intel_uc *uc);
+int intel_uc_runtime_resume(struct intel_uc *uc);
+
+static inline bool intel_uc_supports_guc(struct intel_uc *uc)
+{
+ return intel_guc_is_supported(&uc->guc);
+}
+
+static inline bool intel_uc_uses_guc(struct intel_uc *uc)
+{
+ return intel_guc_is_enabled(&uc->guc);
+}
+
+static inline bool intel_uc_supports_guc_submission(struct intel_uc *uc)
+{
+ return intel_guc_is_submission_supported(&uc->guc);
+}
+
+static inline bool intel_uc_uses_guc_submission(struct intel_uc *uc)
+{
+ return intel_guc_is_submission_supported(&uc->guc);
+}
+
+static inline bool intel_uc_supports_huc(struct intel_uc *uc)
+{
+ return intel_uc_supports_guc(uc);
+}
+
+static inline bool intel_uc_uses_huc(struct intel_uc *uc)
+{
+ return intel_huc_is_enabled(&uc->huc);
+}
+
+#define intel_uc_ops_function(_NAME, _OPS, _TYPE, _RET) \
+static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
+{ \
+ if (uc->ops->_OPS) \
+ return uc->ops->_OPS(uc); \
+ return _RET; \
+}
+intel_uc_ops_function(sanitize, sanitize, int, 0);
+intel_uc_ops_function(fetch_firmwares, init_fw, void, );
+intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
+intel_uc_ops_function(init, init, void, );
+intel_uc_ops_function(fini, fini, void, );
+intel_uc_ops_function(init_hw, init_hw, int, 0);
+intel_uc_ops_function(fini_hw, fini_hw, void, );
+#undef intel_uc_ops_function
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
new file mode 100644
index 000000000000..8ee0a0c7f447
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -0,0 +1,604 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2016-2019 Intel Corporation
+ */
+
+#include <linux/bitfield.h>
+#include <linux/firmware.h>
+#include <drm/drm_print.h>
+
+#include "intel_uc_fw.h"
+#include "intel_uc_fw_abi.h"
+#include "i915_drv.h"
+
+static inline struct intel_gt *__uc_fw_to_gt(struct intel_uc_fw *uc_fw)
+{
+ GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC)
+ return container_of(uc_fw, struct intel_gt, uc.guc.fw);
+
+ GEM_BUG_ON(uc_fw->type != INTEL_UC_FW_TYPE_HUC);
+ return container_of(uc_fw, struct intel_gt, uc.huc.fw);
+}
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
+ enum intel_uc_fw_status status)
+{
+ uc_fw->__status = status;
+ DRM_DEV_DEBUG_DRIVER(__uc_fw_to_gt(uc_fw)->i915->drm.dev,
+ "%s firmware -> %s\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ status == INTEL_UC_FIRMWARE_SELECTED ?
+ uc_fw->path : intel_uc_fw_status_repr(status));
+}
+#endif
+
+/*
+ * List of required GuC and HuC binaries per-platform.
+ * Must be ordered based on platform + revid, from newer to older.
+ *
+ * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas
+ * between 33.0 and 35.2 are only related to new additions to support new Gen12
+ * features.
+ */
+#define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \
+ fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 0, 3)) \
+ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \
+ fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \
+ fw_def(COFFEELAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \
+ fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
+ fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \
+ fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \
+ fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \
+ fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0))
+
+#define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \
+ "i915/" \
+ __stringify(prefix_) name_ \
+ __stringify(major_) "." \
+ __stringify(minor_) "." \
+ __stringify(patch_) ".bin"
+
+#define MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_) \
+ __MAKE_UC_FW_PATH(prefix_, "_guc_", major_, minor_, patch_)
+
+#define MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_) \
+ __MAKE_UC_FW_PATH(prefix_, "_huc_", major_, minor_, bld_num_)
+
+/* All blobs need to be declared via MODULE_FIRMWARE() */
+#define INTEL_UC_MODULE_FW(platform_, revid_, guc_, huc_) \
+ MODULE_FIRMWARE(guc_); \
+ MODULE_FIRMWARE(huc_);
+
+INTEL_UC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH, MAKE_HUC_FW_PATH)
+
+/* The below structs and macros are used to iterate across the list of blobs */
+struct __packed uc_fw_blob {
+ u8 major;
+ u8 minor;
+ const char *path;
+};
+
+#define UC_FW_BLOB(major_, minor_, path_) \
+ { .major = major_, .minor = minor_, .path = path_ }
+
+#define GUC_FW_BLOB(prefix_, major_, minor_, patch_) \
+ UC_FW_BLOB(major_, minor_, \
+ MAKE_GUC_FW_PATH(prefix_, major_, minor_, patch_))
+
+#define HUC_FW_BLOB(prefix_, major_, minor_, bld_num_) \
+ UC_FW_BLOB(major_, minor_, \
+ MAKE_HUC_FW_PATH(prefix_, major_, minor_, bld_num_))
+
+struct __packed uc_fw_platform_requirement {
+ enum intel_platform p;
+ u8 rev; /* first platform rev using this FW */
+ const struct uc_fw_blob blobs[INTEL_UC_FW_NUM_TYPES];
+};
+
+#define MAKE_FW_LIST(platform_, revid_, guc_, huc_) \
+{ \
+ .p = INTEL_##platform_, \
+ .rev = revid_, \
+ .blobs[INTEL_UC_FW_TYPE_GUC] = guc_, \
+ .blobs[INTEL_UC_FW_TYPE_HUC] = huc_, \
+},
+
+static void
+__uc_fw_auto_select(struct intel_uc_fw *uc_fw, enum intel_platform p, u8 rev)
+{
+ static const struct uc_fw_platform_requirement fw_blobs[] = {
+ INTEL_UC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, HUC_FW_BLOB)
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fw_blobs) && p <= fw_blobs[i].p; i++) {
+ if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
+ const struct uc_fw_blob *blob =
+ &fw_blobs[i].blobs[uc_fw->type];
+ uc_fw->path = blob->path;
+ uc_fw->major_ver_wanted = blob->major;
+ uc_fw->minor_ver_wanted = blob->minor;
+ break;
+ }
+ }
+
+ /* make sure the list is ordered as expected */
+ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
+ for (i = 1; i < ARRAY_SIZE(fw_blobs); i++) {
+ if (fw_blobs[i].p < fw_blobs[i - 1].p)
+ continue;
+
+ if (fw_blobs[i].p == fw_blobs[i - 1].p &&
+ fw_blobs[i].rev < fw_blobs[i - 1].rev)
+ continue;
+
+ pr_err("invalid FW blob order: %s r%u comes before %s r%u\n",
+ intel_platform_name(fw_blobs[i - 1].p),
+ fw_blobs[i - 1].rev,
+ intel_platform_name(fw_blobs[i].p),
+ fw_blobs[i].rev);
+
+ uc_fw->path = NULL;
+ }
+ }
+
+ /* We don't want to enable GuC/HuC on pre-Gen11 by default */
+ if (i915_modparams.enable_guc == -1 && p < INTEL_ICELAKE)
+ uc_fw->path = NULL;
+}
+
+static const char *__override_guc_firmware_path(void)
+{
+ if (i915_modparams.enable_guc & (ENABLE_GUC_SUBMISSION |
+ ENABLE_GUC_LOAD_HUC))
+ return i915_modparams.guc_firmware_path;
+ return "";
+}
+
+static const char *__override_huc_firmware_path(void)
+{
+ if (i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC)
+ return i915_modparams.huc_firmware_path;
+ return "";
+}
+
+static void __uc_fw_user_override(struct intel_uc_fw *uc_fw)
+{
+ const char *path = NULL;
+
+ switch (uc_fw->type) {
+ case INTEL_UC_FW_TYPE_GUC:
+ path = __override_guc_firmware_path();
+ break;
+ case INTEL_UC_FW_TYPE_HUC:
+ path = __override_huc_firmware_path();
+ break;
+ }
+
+ if (unlikely(path)) {
+ uc_fw->path = path;
+ uc_fw->user_overridden = true;
+ }
+}
+
+/**
+ * intel_uc_fw_init_early - initialize the uC object and select the firmware
+ * @uc_fw: uC firmware
+ * @type: type of uC
+ * @supported: is uC support possible
+ * @platform: platform identifier
+ * @rev: hardware revision
+ *
+ * Initialize the state of our uC object and relevant tracking and select the
+ * firmware to fetch and load.
+ */
+void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
+ enum intel_uc_fw_type type, bool supported,
+ enum intel_platform platform, u8 rev)
+{
+ /*
+ * we use FIRMWARE_UNINITIALIZED to detect checks against uc_fw->status
+ * before we're looked at the HW caps to see if we have uc support
+ */
+ BUILD_BUG_ON(INTEL_UC_FIRMWARE_UNINITIALIZED);
+ GEM_BUG_ON(uc_fw->status);
+ GEM_BUG_ON(uc_fw->path);
+
+ uc_fw->type = type;
+
+ if (supported) {
+ __uc_fw_auto_select(uc_fw, platform, rev);
+ __uc_fw_user_override(uc_fw);
+ }
+
+ intel_uc_fw_change_status(uc_fw, uc_fw->path ? *uc_fw->path ?
+ INTEL_UC_FIRMWARE_SELECTED :
+ INTEL_UC_FIRMWARE_DISABLED :
+ INTEL_UC_FIRMWARE_NOT_SUPPORTED);
+}
+
+static void __force_fw_fetch_failures(struct intel_uc_fw *uc_fw, int e)
+{
+ struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
+ bool user = e == -EINVAL;
+
+ if (i915_inject_probe_error(i915, e)) {
+ /* non-existing blob */
+ uc_fw->path = "<invalid>";
+ uc_fw->user_overridden = user;
+ } else if (i915_inject_probe_error(i915, e)) {
+ /* require next major version */
+ uc_fw->major_ver_wanted += 1;
+ uc_fw->minor_ver_wanted = 0;
+ uc_fw->user_overridden = user;
+ } else if (i915_inject_probe_error(i915, e)) {
+ /* require next minor version */
+ uc_fw->minor_ver_wanted += 1;
+ uc_fw->user_overridden = user;
+ } else if (uc_fw->major_ver_wanted &&
+ i915_inject_probe_error(i915, e)) {
+ /* require prev major version */
+ uc_fw->major_ver_wanted -= 1;
+ uc_fw->minor_ver_wanted = 0;
+ uc_fw->user_overridden = user;
+ } else if (uc_fw->minor_ver_wanted &&
+ i915_inject_probe_error(i915, e)) {
+ /* require prev minor version - hey, this should work! */
+ uc_fw->minor_ver_wanted -= 1;
+ uc_fw->user_overridden = user;
+ } else if (user && i915_inject_probe_error(i915, e)) {
+ /* officially unsupported platform */
+ uc_fw->major_ver_wanted = 0;
+ uc_fw->minor_ver_wanted = 0;
+ uc_fw->user_overridden = true;
+ }
+}
+
+/**
+ * intel_uc_fw_fetch - fetch uC firmware
+ * @uc_fw: uC firmware
+ *
+ * Fetch uC firmware into GEM obj.
+ *
+ * Return: 0 on success, a negative errno code on failure.
+ */
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
+{
+ struct drm_i915_private *i915 = __uc_fw_to_gt(uc_fw)->i915;
+ struct device *dev = i915->drm.dev;
+ struct drm_i915_gem_object *obj;
+ const struct firmware *fw = NULL;
+ struct uc_css_header *css;
+ size_t size;
+ int err;
+
+ GEM_BUG_ON(!i915->wopcm.size);
+ GEM_BUG_ON(!intel_uc_fw_is_enabled(uc_fw));
+
+ err = i915_inject_probe_error(i915, -ENXIO);
+ if (err)
+ return err;
+
+ __force_fw_fetch_failures(uc_fw, -EINVAL);
+ __force_fw_fetch_failures(uc_fw, -ESTALE);
+
+ err = request_firmware(&fw, uc_fw->path, dev);
+ if (err)
+ goto fail;
+
+ /* Check the size of the blob before examining buffer contents */
+ if (unlikely(fw->size < sizeof(struct uc_css_header))) {
+ dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ fw->size, sizeof(struct uc_css_header));
+ err = -ENODATA;
+ goto fail;
+ }
+
+ css = (struct uc_css_header *)fw->data;
+
+ /* Check integrity of size values inside CSS header */
+ size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
+ css->exponent_size_dw) * sizeof(u32);
+ if (unlikely(size != sizeof(struct uc_css_header))) {
+ dev_warn(dev,
+ "%s firmware %s: unexpected header size: %zu != %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ fw->size, sizeof(struct uc_css_header));
+ err = -EPROTO;
+ goto fail;
+ }
+
+ /* uCode size must calculated from other sizes */
+ uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+ /* now RSA */
+ if (unlikely(css->key_size_dw != UOS_RSA_SCRATCH_COUNT)) {
+ dev_warn(dev, "%s firmware %s: unexpected key size: %u != %u\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ css->key_size_dw, UOS_RSA_SCRATCH_COUNT);
+ err = -EPROTO;
+ goto fail;
+ }
+ uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+ /* At least, it should have header, uCode and RSA. Size of all three. */
+ size = sizeof(struct uc_css_header) + uc_fw->ucode_size + uc_fw->rsa_size;
+ if (unlikely(fw->size < size)) {
+ dev_warn(dev, "%s firmware %s: invalid size: %zu < %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ fw->size, size);
+ err = -ENOEXEC;
+ goto fail;
+ }
+
+ /* Sanity check whether this fw is not larger than whole WOPCM memory */
+ size = __intel_uc_fw_get_upload_size(uc_fw);
+ if (unlikely(size >= i915->wopcm.size)) {
+ dev_warn(dev, "%s firmware %s: invalid size: %zu > %zu\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ size, (size_t)i915->wopcm.size);
+ err = -E2BIG;
+ goto fail;
+ }
+
+ /* Get version numbers from the CSS header */
+ uc_fw->major_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MAJOR,
+ css->sw_version);
+ uc_fw->minor_ver_found = FIELD_GET(CSS_SW_VERSION_UC_MINOR,
+ css->sw_version);
+
+ if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+ uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+ dev_notice(dev, "%s firmware %s: unexpected version: %u.%u != %u.%u\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ uc_fw->major_ver_found, uc_fw->minor_ver_found,
+ uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+ if (!intel_uc_fw_is_overridden(uc_fw)) {
+ err = -ENOEXEC;
+ goto fail;
+ }
+ }
+
+ obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto fail;
+ }
+
+ uc_fw->obj = obj;
+ uc_fw->size = fw->size;
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
+
+ release_firmware(fw);
+ return 0;
+
+fail:
+ intel_uc_fw_change_status(uc_fw, err == -ENOENT ?
+ INTEL_UC_FIRMWARE_MISSING :
+ INTEL_UC_FIRMWARE_ERROR);
+
+ dev_notice(dev, "%s firmware %s: fetch failed with error %d\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+ dev_info(dev, "%s firmware(s) can be downloaded from %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
+
+ release_firmware(fw); /* OK even if fw is NULL */
+ return err;
+}
+
+static u32 uc_fw_ggtt_offset(struct intel_uc_fw *uc_fw)
+{
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+ struct drm_mm_node *node = &ggtt->uc_fw;
+
+ GEM_BUG_ON(!drm_mm_node_allocated(node));
+ GEM_BUG_ON(upper_32_bits(node->start));
+ GEM_BUG_ON(upper_32_bits(node->start + node->size - 1));
+
+ return lower_32_bits(node->start);
+}
+
+static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
+{
+ struct drm_i915_gem_object *obj = uc_fw->obj;
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+ struct i915_vma dummy = {
+ .node.start = uc_fw_ggtt_offset(uc_fw),
+ .node.size = obj->base.size,
+ .pages = obj->mm.pages,
+ .vm = &ggtt->vm,
+ };
+
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size);
+
+ /* uc_fw->obj cache domains were not controlled across suspend */
+ drm_clflush_sg(dummy.pages);
+
+ ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
+}
+
+static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
+{
+ struct drm_i915_gem_object *obj = uc_fw->obj;
+ struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
+ u64 start = uc_fw_ggtt_offset(uc_fw);
+
+ ggtt->vm.clear_range(&ggtt->vm, start, obj->base.size);
+}
+
+static int uc_fw_xfer(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
+{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+ struct intel_uncore *uncore = gt->uncore;
+ u64 offset;
+ int ret;
+
+ ret = i915_inject_probe_error(gt->i915, -ETIMEDOUT);
+ if (ret)
+ return ret;
+
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ /* Set the source address for the uCode */
+ offset = uc_fw_ggtt_offset(uc_fw);
+ GEM_BUG_ON(upper_32_bits(offset) & 0xFFFF0000);
+ intel_uncore_write_fw(uncore, DMA_ADDR_0_LOW, lower_32_bits(offset));
+ intel_uncore_write_fw(uncore, DMA_ADDR_0_HIGH, upper_32_bits(offset));
+
+ /* Set the DMA destination */
+ intel_uncore_write_fw(uncore, DMA_ADDR_1_LOW, dst_offset);
+ intel_uncore_write_fw(uncore, DMA_ADDR_1_HIGH, DMA_ADDRESS_SPACE_WOPCM);
+
+ /*
+ * Set the transfer size. The header plus uCode will be copied to WOPCM
+ * via DMA, excluding any other components
+ */
+ intel_uncore_write_fw(uncore, DMA_COPY_SIZE,
+ sizeof(struct uc_css_header) + uc_fw->ucode_size);
+
+ /* Start the DMA */
+ intel_uncore_write_fw(uncore, DMA_CTRL,
+ _MASKED_BIT_ENABLE(dma_flags | START_DMA));
+
+ /* Wait for DMA to finish */
+ ret = intel_wait_for_register_fw(uncore, DMA_CTRL, START_DMA, 0, 100);
+ if (ret)
+ dev_err(gt->i915->drm.dev, "DMA for %s fw failed, DMA_CTRL=%u\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ intel_uncore_read_fw(uncore, DMA_CTRL));
+
+ /* Disable the bits once DMA is over */
+ intel_uncore_write_fw(uncore, DMA_CTRL, _MASKED_BIT_DISABLE(dma_flags));
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ return ret;
+}
+
+/**
+ * intel_uc_fw_upload - load uC firmware using custom loader
+ * @uc_fw: uC firmware
+ * @dst_offset: destination offset
+ * @dma_flags: flags for flags for dma ctrl
+ *
+ * Loads uC firmware and updates internal flags.
+ *
+ * Return: 0 on success, non-zero on failure.
+ */
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 dst_offset, u32 dma_flags)
+{
+ struct intel_gt *gt = __uc_fw_to_gt(uc_fw);
+ int err;
+
+ /* make sure the status was cleared the last time we reset the uc */
+ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
+
+ err = i915_inject_probe_error(gt->i915, -ENOEXEC);
+ if (err)
+ return err;
+
+ if (!intel_uc_fw_is_available(uc_fw))
+ return -ENOEXEC;
+
+ /* Call custom loader */
+ uc_fw_bind_ggtt(uc_fw);
+ err = uc_fw_xfer(uc_fw, dst_offset, dma_flags);
+ uc_fw_unbind_ggtt(uc_fw);
+ if (err)
+ goto fail;
+
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_TRANSFERRED);
+ return 0;
+
+fail:
+ i915_probe_error(gt->i915, "Failed to load %s firmware %s (%d)\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path,
+ err);
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+ return err;
+}
+
+int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
+{
+ int err;
+
+ /* this should happen before the load! */
+ GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
+
+ if (!intel_uc_fw_is_available(uc_fw))
+ return -ENOEXEC;
+
+ err = i915_gem_object_pin_pages(uc_fw->obj);
+ if (err) {
+ DRM_DEBUG_DRIVER("%s fw pin-pages err=%d\n",
+ intel_uc_fw_type_repr(uc_fw->type), err);
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_FAIL);
+ }
+
+ return err;
+}
+
+void intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
+{
+ intel_uc_fw_cleanup_fetch(uc_fw);
+}
+
+/**
+ * intel_uc_fw_cleanup_fetch - cleanup uC firmware
+ * @uc_fw: uC firmware
+ *
+ * Cleans up uC firmware by releasing the firmware GEM obj.
+ */
+void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw)
+{
+ if (!intel_uc_fw_is_available(uc_fw))
+ return;
+
+ i915_gem_object_put(fetch_and_zero(&uc_fw->obj));
+
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_SELECTED);
+}
+
+/**
+ * intel_uc_fw_copy_rsa - copy fw RSA to buffer
+ *
+ * @uc_fw: uC firmware
+ * @dst: dst buffer
+ * @max_len: max number of bytes to copy
+ *
+ * Return: number of copied bytes.
+ */
+size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
+{
+ struct sg_table *pages = uc_fw->obj->mm.pages;
+ u32 size = min_t(u32, uc_fw->rsa_size, max_len);
+ u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
+
+ GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
+
+ return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset);
+}
+
+/**
+ * intel_uc_fw_dump - dump information about uC firmware
+ * @uc_fw: uC firmware
+ * @p: the &drm_printer
+ *
+ * Pretty printer for uC firmware.
+ */
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
+{
+ drm_printf(p, "%s firmware: %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ drm_printf(p, "\tstatus: %s\n",
+ intel_uc_fw_status_repr(uc_fw->status));
+ drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
+ uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted,
+ uc_fw->major_ver_found, uc_fw->minor_ver_found);
+ drm_printf(p, "\tuCode: %u bytes\n", uc_fw->ucode_size);
+ drm_printf(p, "\tRSA: %u bytes\n", uc_fw->rsa_size);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
new file mode 100644
index 000000000000..1f30543d0d2d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -0,0 +1,240 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2014-2019 Intel Corporation
+ */
+
+#ifndef _INTEL_UC_FW_H_
+#define _INTEL_UC_FW_H_
+
+#include <linux/types.h>
+#include "intel_uc_fw_abi.h"
+#include "intel_device_info.h"
+#include "i915_gem.h"
+
+struct drm_printer;
+struct drm_i915_private;
+struct intel_gt;
+
+/* Home of GuC, HuC and DMC firmwares */
+#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915"
+
+/*
+ * +------------+---------------------------------------------------+
+ * | PHASE | FIRMWARE STATUS TRANSITIONS |
+ * +============+===================================================+
+ * | | UNINITIALIZED |
+ * +------------+- / | \ -+
+ * | | DISABLED <--/ | \--> NOT_SUPPORTED |
+ * | init_early | V |
+ * | | SELECTED |
+ * +------------+- / | \ -+
+ * | | MISSING <--/ | \--> ERROR |
+ * | fetch | | |
+ * | | /------> AVAILABLE <---<-----------\ |
+ * +------------+- \ / \ \ \ -+
+ * | | FAIL <--< \--> TRANSFERRED \ |
+ * | upload | \ / \ / |
+ * | | \---------/ \--> RUNNING |
+ * +------------+---------------------------------------------------+
+ */
+
+enum intel_uc_fw_status {
+ INTEL_UC_FIRMWARE_NOT_SUPPORTED = -1, /* no uc HW */
+ INTEL_UC_FIRMWARE_UNINITIALIZED = 0, /* used to catch checks done too early */
+ INTEL_UC_FIRMWARE_DISABLED, /* disabled */
+ INTEL_UC_FIRMWARE_SELECTED, /* selected the blob we want to load */
+ INTEL_UC_FIRMWARE_MISSING, /* blob not found on the system */
+ INTEL_UC_FIRMWARE_ERROR, /* invalid format or version */
+ INTEL_UC_FIRMWARE_AVAILABLE, /* blob found and copied in mem */
+ INTEL_UC_FIRMWARE_FAIL, /* failed to xfer or init/auth the fw */
+ INTEL_UC_FIRMWARE_TRANSFERRED, /* dma xfer done */
+ INTEL_UC_FIRMWARE_RUNNING /* init/auth done */
+};
+
+enum intel_uc_fw_type {
+ INTEL_UC_FW_TYPE_GUC = 0,
+ INTEL_UC_FW_TYPE_HUC
+};
+#define INTEL_UC_FW_NUM_TYPES 2
+
+/*
+ * This structure encapsulates all the data needed during the process
+ * of fetching, caching, and loading the firmware image into the uC.
+ */
+struct intel_uc_fw {
+ enum intel_uc_fw_type type;
+ union {
+ const enum intel_uc_fw_status status;
+ enum intel_uc_fw_status __status; /* no accidental overwrites */
+ };
+ const char *path;
+ bool user_overridden;
+ size_t size;
+ struct drm_i915_gem_object *obj;
+
+ /*
+ * The firmware build process will generate a version header file with major and
+ * minor version defined. The versions are built into CSS header of firmware.
+ * i915 kernel driver set the minimal firmware version required per platform.
+ */
+ u16 major_ver_wanted;
+ u16 minor_ver_wanted;
+ u16 major_ver_found;
+ u16 minor_ver_found;
+
+ u32 rsa_size;
+ u32 ucode_size;
+};
+
+#ifdef CONFIG_DRM_I915_DEBUG_GUC
+void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
+ enum intel_uc_fw_status status);
+#else
+static inline void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
+ enum intel_uc_fw_status status)
+{
+ uc_fw->__status = status;
+}
+#endif
+
+static inline
+const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
+{
+ switch (status) {
+ case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
+ return "N/A";
+ case INTEL_UC_FIRMWARE_UNINITIALIZED:
+ return "UNINITIALIZED";
+ case INTEL_UC_FIRMWARE_DISABLED:
+ return "DISABLED";
+ case INTEL_UC_FIRMWARE_SELECTED:
+ return "SELECTED";
+ case INTEL_UC_FIRMWARE_MISSING:
+ return "MISSING";
+ case INTEL_UC_FIRMWARE_ERROR:
+ return "ERROR";
+ case INTEL_UC_FIRMWARE_AVAILABLE:
+ return "AVAILABLE";
+ case INTEL_UC_FIRMWARE_FAIL:
+ return "FAIL";
+ case INTEL_UC_FIRMWARE_TRANSFERRED:
+ return "TRANSFERRED";
+ case INTEL_UC_FIRMWARE_RUNNING:
+ return "RUNNING";
+ }
+ return "<invalid>";
+}
+
+static inline int intel_uc_fw_status_to_error(enum intel_uc_fw_status status)
+{
+ switch (status) {
+ case INTEL_UC_FIRMWARE_NOT_SUPPORTED:
+ return -ENODEV;
+ case INTEL_UC_FIRMWARE_UNINITIALIZED:
+ return -EACCES;
+ case INTEL_UC_FIRMWARE_DISABLED:
+ return -EPERM;
+ case INTEL_UC_FIRMWARE_MISSING:
+ return -ENOENT;
+ case INTEL_UC_FIRMWARE_ERROR:
+ return -ENOEXEC;
+ case INTEL_UC_FIRMWARE_FAIL:
+ return -EIO;
+ case INTEL_UC_FIRMWARE_SELECTED:
+ return -ESTALE;
+ case INTEL_UC_FIRMWARE_AVAILABLE:
+ case INTEL_UC_FIRMWARE_TRANSFERRED:
+ case INTEL_UC_FIRMWARE_RUNNING:
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type)
+{
+ switch (type) {
+ case INTEL_UC_FW_TYPE_GUC:
+ return "GuC";
+ case INTEL_UC_FW_TYPE_HUC:
+ return "HuC";
+ }
+ return "uC";
+}
+
+static inline enum intel_uc_fw_status
+__intel_uc_fw_status(struct intel_uc_fw *uc_fw)
+{
+ /* shouldn't call this before checking hw/blob availability */
+ GEM_BUG_ON(uc_fw->status == INTEL_UC_FIRMWARE_UNINITIALIZED);
+ return uc_fw->status;
+}
+
+static inline bool intel_uc_fw_is_supported(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) != INTEL_UC_FIRMWARE_NOT_SUPPORTED;
+}
+
+static inline bool intel_uc_fw_is_enabled(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) > INTEL_UC_FIRMWARE_DISABLED;
+}
+
+static inline bool intel_uc_fw_is_available(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_AVAILABLE;
+}
+
+static inline bool intel_uc_fw_is_loaded(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) >= INTEL_UC_FIRMWARE_TRANSFERRED;
+}
+
+static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw)
+{
+ return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING;
+}
+
+static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw)
+{
+ return uc_fw->user_overridden;
+}
+
+static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw)
+{
+ if (intel_uc_fw_is_loaded(uc_fw))
+ intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_AVAILABLE);
+}
+
+static inline u32 __intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
+{
+ return sizeof(struct uc_css_header) + uc_fw->ucode_size;
+}
+
+/**
+ * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded.
+ * @uc_fw: uC firmware.
+ *
+ * Get the size of the firmware and header that will be uploaded to WOPCM.
+ *
+ * Return: Upload firmware size, or zero on firmware fetch failure.
+ */
+static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw)
+{
+ if (!intel_uc_fw_is_available(uc_fw))
+ return 0;
+
+ return __intel_uc_fw_get_upload_size(uc_fw);
+}
+
+void intel_uc_fw_init_early(struct intel_uc_fw *uc_fw,
+ enum intel_uc_fw_type type, bool supported,
+ enum intel_platform platform, u8 rev);
+int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw);
+void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw);
+int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, u32 offset, u32 dma_flags);
+int intel_uc_fw_init(struct intel_uc_fw *uc_fw);
+void intel_uc_fw_fini(struct intel_uc_fw *uc_fw);
+size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len);
+void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
new file mode 100644
index 000000000000..029214cdedd5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _INTEL_UC_FW_ABI_H
+#define _INTEL_UC_FW_ABI_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/**
+ * DOC: Firmware Layout
+ *
+ * The GuC/HuC firmware layout looks like this::
+ *
+ * +======================================================================+
+ * | Firmware blob |
+ * +===============+===============+============+============+============+
+ * | CSS header | uCode | RSA key | modulus | exponent |
+ * +===============+===============+============+============+============+
+ * <-header size-> <---header size continued ----------->
+ * <--- size ----------------------------------------------------------->
+ * <-key size->
+ * <-mod size->
+ * <-exp size->
+ *
+ * The firmware may or may not have modulus key and exponent data. The header,
+ * uCode and RSA signature are must-have components that will be used by driver.
+ * Length of each components, which is all in dwords, can be found in header.
+ * In the case that modulus and exponent are not present in fw, a.k.a truncated
+ * image, the length value still appears in header.
+ *
+ * Driver will do some basic fw size validation based on the following rules:
+ *
+ * 1. Header, uCode and RSA are must-have components.
+ * 2. All firmware components, if they present, are in the sequence illustrated
+ * in the layout table above.
+ * 3. Length info of each component can be found in header, in dwords.
+ * 4. Modulus and exponent key are not required by driver. They may not appear
+ * in fw. So driver will load a truncated firmware in this case.
+ */
+
+struct uc_css_header {
+ u32 module_type;
+ /*
+ * header_size includes all non-uCode bits, including css_header, rsa
+ * key, modulus key and exponent data.
+ */
+ u32 header_size_dw;
+ u32 header_version;
+ u32 module_id;
+ u32 module_vendor;
+ u32 date;
+#define CSS_DATE_DAY (0xFF << 0)
+#define CSS_DATE_MONTH (0xFF << 8)
+#define CSS_DATE_YEAR (0xFFFF << 16)
+ u32 size_dw; /* uCode plus header_size_dw */
+ u32 key_size_dw;
+ u32 modulus_size_dw;
+ u32 exponent_size_dw;
+ u32 time;
+#define CSS_TIME_HOUR (0xFF << 0)
+#define CSS_DATE_MIN (0xFF << 8)
+#define CSS_DATE_SEC (0xFFFF << 16)
+ char username[8];
+ char buildnumber[12];
+ u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
+ u32 reserved[14];
+ u32 header_info;
+} __packed;
+static_assert(sizeof(struct uc_css_header) == 128);
+
+#endif /* _INTEL_UC_FW_ABI_H */
OpenPOWER on IntegriCloud